]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
[viki] Fix extractor (Closes #91)
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
0ca96d48 5import itertools
c5e8d7af 6import json
c4417ddb 7import os.path
d77ab8e2 8import random
c5e8d7af 9import re
8a784c74 10import time
e0df6211 11import traceback
c5e8d7af 12
b05654f0 13from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 14from ..compat import (
edf3e38e 15 compat_chr,
29f7c58a 16 compat_HTTPError,
8d81f3e3 17 compat_kwargs,
c5e8d7af 18 compat_parse_qs,
545cc85d 19 compat_str,
7fd002c0 20 compat_urllib_parse_unquote_plus,
15707c7e 21 compat_urllib_parse_urlencode,
7c80519c 22 compat_urllib_parse_urlparse,
7c61bd36 23 compat_urlparse,
4bb4a188 24)
545cc85d 25from ..jsinterp import JSInterpreter
4bb4a188 26from ..utils import (
c5e8d7af 27 clean_html,
c5e8d7af 28 ExtractorError,
b60419c5 29 format_field,
2d30521a 30 float_or_none,
dd27fd17 31 int_or_none,
94278f72 32 mimetype2ext,
6310acf5 33 parse_codecs,
7c80519c 34 parse_duration,
dca3ff4a 35 qualities,
3995d37d 36 remove_start,
cf7e015f 37 smuggle_url,
dbdaaa23 38 str_or_none,
c93d53f5 39 str_to_int,
556dbe7f 40 try_get,
c5e8d7af
PH
41 unescapeHTML,
42 unified_strdate,
cf7e015f 43 unsmuggle_url,
8bdd16b4 44 update_url_query,
21c340b8 45 url_or_none,
6e6bc8da 46 urlencode_postdata,
8bdd16b4 47 urljoin,
c5e8d7af
PH
48)
49
5f6a1245 50
de7f3446 51class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
52 """Provide base functions for Youtube extractors"""
53 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 54 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
55
56 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
57 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
58 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 59
3462ffa8 60 _RESERVED_NAMES = (
9ba5705a 61 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|hashtag|'
29f7c58a 62 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
63 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 64
b2e8bc1b
JMF
65 _NETRC_MACHINE = 'youtube'
66 # If True it will raise an error if no login info is provided
67 _LOGIN_REQUIRED = False
68
70d5c17b 69 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 70
25f14e9f
S
71 def _ids_to_results(self, ids):
72 return [
73 self.url_result(vid_id, 'Youtube', video_id=vid_id)
74 for vid_id in ids]
75
b2e8bc1b 76 def _login(self):
83317f69 77 """
78 Attempt to log in to YouTube.
79 True is returned if successful or skipped.
80 False is returned if login failed.
81
82 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
83 """
68217024 84 username, password = self._get_login_info()
b2e8bc1b
JMF
85 # No authentication to be performed
86 if username is None:
70d35d16 87 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 88 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 89 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
90 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 91 return True
b2e8bc1b 92
7cc3570e
PH
93 login_page = self._download_webpage(
94 self._LOGIN_URL, None,
69ea8ca4
PH
95 note='Downloading login page',
96 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
97 if login_page is False:
98 return
b2e8bc1b 99
1212e997 100 login_form = self._hidden_inputs(login_page)
c5e8d7af 101
e00eb564
S
102 def req(url, f_req, note, errnote):
103 data = login_form.copy()
104 data.update({
105 'pstMsg': 1,
106 'checkConnection': 'youtube',
107 'checkedDomains': 'youtube',
108 'hl': 'en',
109 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 110 'f.req': json.dumps(f_req),
e00eb564
S
111 'flowName': 'GlifWebSignIn',
112 'flowEntry': 'ServiceLogin',
baf67a60
S
113 # TODO: reverse actual botguard identifier generation algo
114 'bgRequest': '["identifier",""]',
041bc3ad 115 })
e00eb564
S
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
119 fatal=False,
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
123 })
124
3995d37d
S
125 def warn(message):
126 self._downloader.report_warning(message)
127
128 lookup_req = [
129 username,
130 None, [], None, 'US', None, None, 2, False, True,
131 [
132 None, None,
133 [2, 1, None, 1,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
135 None, [], 4],
136 1, [None, None, []], None, None, None, True
137 ],
138 username,
139 ]
140
e00eb564 141 lookup_results = req(
3995d37d 142 self._LOOKUP_URL, lookup_req,
e00eb564
S
143 'Looking up account info', 'Unable to look up account info')
144
145 if lookup_results is False:
146 return False
041bc3ad 147
3995d37d
S
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
149 if not user_hash:
150 warn('Unable to extract user hash')
151 return False
152
153 challenge_req = [
154 user_hash,
155 None, 1, None, [1, None, None, None, [password, None, True]],
156 [
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ]]
83317f69 160
3995d37d
S
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
83317f69 164
3995d37d 165 if challenge_results is False:
e00eb564 166 return
83317f69 167
3995d37d
S
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
169 if login_res:
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
171 warn(
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
174 return False
175
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
177 if not res:
178 warn('Unable to extract result entry')
179 return False
180
9a6628aa
S
181 login_challenge = try_get(res, lambda x: x[0][0], list)
182 if login_challenge:
183 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
184 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 187 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
190 return False
191
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
193 if not tl:
194 warn('Unable to extract TL')
195 return False
196
197 tfa_code = self._get_tfa_info('2-step verification code')
198
199 if not tfa_code:
200 warn(
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
203 return False
204
205 tfa_code = remove_start(tfa_code, 'G-')
206
207 tfa_req = [
208 user_hash, None, 2, None,
209 [
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
212 ]]
213
214 tfa_results = req(
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
217
218 if tfa_results is False:
219 return False
220
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
222 if tfa_res:
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
224 warn(
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
227 return False
228
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
231 else:
232 CHALLENGES = {
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
236 }
237 challenge = CHALLENGES.get(
238 challenge_str,
239 '%s returned error %s.' % (self.IE_NAME, challenge_str))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
241 return False
3995d37d
S
242 else:
243 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
244
245 if not check_cookie_url:
246 warn('Unable to extract CheckCookie URL')
247 return False
e00eb564
S
248
249 check_cookie_results = self._download_webpage(
3995d37d
S
250 check_cookie_url, None, 'Checking cookie', fatal=False)
251
252 if check_cookie_results is False:
253 return False
e00eb564 254
3995d37d
S
255 if 'https://myaccount.google.com/' not in check_cookie_results:
256 warn('Unable to log in')
b2e8bc1b 257 return False
e00eb564 258
b2e8bc1b
JMF
259 return True
260
30226342 261 def _download_webpage_handle(self, *args, **kwargs):
c1148516 262 query = kwargs.get('query', {}).copy()
c1148516 263 kwargs['query'] = query
30226342 264 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
265 *args, **compat_kwargs(kwargs))
266
b2e8bc1b
JMF
267 def _real_initialize(self):
268 if self._downloader is None:
269 return
b2e8bc1b
JMF
270 if not self._login():
271 return
c5e8d7af 272
8bdd16b4 273 _DEFAULT_API_DATA = {
274 'context': {
275 'client': {
276 'clientName': 'WEB',
277 'clientVersion': '2.20201021.03.00',
278 }
279 },
280 }
8377574c 281
a0566bbf 282 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 283 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
284 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 285
545cc85d 286 def _call_api(self, ep, query, video_id, fatal=True):
8bdd16b4 287 data = self._DEFAULT_API_DATA.copy()
288 data.update(query)
9833e7a0 289
545cc85d 290 return self._download_json(
8bdd16b4 291 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
292 note='Downloading API JSON', errnote='Unable to download API page',
545cc85d 293 data=json.dumps(data).encode('utf8'), fatal=fatal,
8bdd16b4 294 headers={'content-type': 'application/json'},
295 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 296
8bdd16b4 297 def _extract_yt_initial_data(self, video_id, webpage):
298 return self._parse_json(
299 self._search_regex(
29f7c58a 300 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 301 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 302 video_id)
0c148415 303
29f7c58a 304 def _extract_ytcfg(self, video_id, webpage):
305 return self._parse_json(
306 self._search_regex(
307 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
308 default='{}'), video_id, fatal=False)
309
30a074c2 310 def _extract_video(self, renderer):
311 video_id = renderer.get('videoId')
312 title = try_get(
313 renderer,
314 (lambda x: x['title']['runs'][0]['text'],
315 lambda x: x['title']['simpleText']), compat_str)
316 description = try_get(
317 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
318 compat_str)
319 duration = parse_duration(try_get(
320 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
321 view_count_text = try_get(
322 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
323 view_count = str_to_int(self._search_regex(
324 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
325 'view count', default=None))
326 uploader = try_get(
327 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
328 return {
329 '_type': 'url_transparent',
330 'ie_key': YoutubeIE.ie_key(),
331 'id': video_id,
332 'url': video_id,
333 'title': title,
334 'description': description,
335 'duration': duration,
336 'view_count': view_count,
337 'uploader': uploader,
338 }
339
0c148415 340
360e1ca5 341class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 342 IE_DESC = 'YouTube.com'
cb7dfeea 343 _VALID_URL = r"""(?x)^
c5e8d7af 344 (
edb53e2d 345 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 346 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 347 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 348 (?:www\.)?pwnyoutube\.com/|
8b561bfc 349 (?:www\.)?hooktube\.com/|
f7000f3a 350 (?:www\.)?yourepeat\.com/|
e69ae5b9 351 tube\.majestyc\.net/|
ba036333 352 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
c86d5023 353 (?:www\.)?invidious\.pussthecat\.org/|
354 (?:www\.)?invidious\.048596\.xyz/|
355 (?:www\.)?invidious\.zee\.li/|
356 (?:www\.)?vid\.puffyan\.us/|
357 (?:(?:www|au)\.)?ytprivate\.com/|
358 (?:www\.)?invidious\.namazso\.eu/|
359 (?:www\.)?invidious\.ethibox\.fr/|
360 (?:www\.)?inv\.skyn3t\.in/|
361 (?:www\.)?invidious\.himiko\.cloud/|
362 (?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion/|
363 (?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion/|
364 (?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion/|
365 (?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion/|
77d95677 366 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 367 (?:(?:www|no)\.)?invidiou\.sh/|
29f7c58a 368 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
8ae113ca 369 (?:www\.)?invidious\.kabi\.tk/|
ba036333 370 (?:www\.)?invidious\.13ad\.de/|
791d2e81 371 (?:www\.)?invidious\.mastodon\.host/|
29f7c58a 372 (?:www\.)?invidious\.zapashcanon\.fr/|
373 (?:www\.)?invidious\.kavin\.rocks/|
374 (?:www\.)?invidious\.tube/|
375 (?:www\.)?invidiou\.site/|
376 (?:www\.)?invidious\.site/|
377 (?:www\.)?invidious\.xyz/|
494d664e 378 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 379 (?:www\.)?invidious\.drycat\.fr/|
ba036333 380 (?:www\.)?tube\.poal\.co/|
29f7c58a 381 (?:www\.)?tube\.connect\.cafe/|
8ae113ca 382 (?:www\.)?vid\.wxzm\.sx/|
29f7c58a 383 (?:www\.)?vid\.mint\.lgbt/|
384bf91f 384 (?:www\.)?yewtu\.be/|
494d664e 385 (?:www\.)?yt\.elukerio\.org/|
894b3826 386 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 387 (?:www\.)?invidious\.ggc-project\.de/|
388 (?:www\.)?yt\.maisputain\.ovh/|
1db5ab6b 389 (?:www\.)?invidious\.toot\.koeln/|
390 (?:www\.)?invidious\.fdn\.fr/|
391 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 392 (?:www\.)?kgg2m7yk5aybusll\.onion/|
393 (?:www\.)?qklhadlycap4cnod\.onion/|
394 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
395 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
396 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
397 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 398 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 399 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 400 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
401 (?:.*?\#/)? # handle anchor (#/) redirect urls
402 (?: # the various things that can precede the ID:
ac7553d0 403 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 404 |(?: # or the v= param in all its forms
f7000f3a 405 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 406 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 407 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
408 v=
409 )
f4b05232 410 ))
cbaed4bb
S
411 |(?:
412 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
413 vid\.plus| # or vid.plus/xxxx
414 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 415 )/
edb53e2d 416 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 417 )
c5e8d7af 418 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 419 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
420 (?!.*?\blist=
421 (?:
422 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
423 WL # WL are handled by the watch later IE
424 )
425 )
c5e8d7af 426 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 427 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
e40c758c 428 _PLAYER_INFO_RE = (
cc2db878 429 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
430 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 431 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 432 )
2c62dc26 433 _formats = {
c2d3cb4c 434 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
435 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
436 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
437 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
438 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
439 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
440 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
441 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 442 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 443 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
444 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
445 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
446 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
447 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
448 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 449 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 450 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
451 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 452
453
454 # 3D videos
c2d3cb4c 455 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
456 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
457 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
458 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 459 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
460 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
461 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 462
96fb5605 463 # Apple HTTP Live Streaming
11f12195 464 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 465 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
466 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
467 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
468 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
469 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 470 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
471 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
472
473 # DASH mp4 video
d23028a8
S
474 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 479 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
480 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
481 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
482 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
483 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
484 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
485 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 486
f6f1fc92 487 # Dash mp4 audio
d23028a8
S
488 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
489 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
490 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
491 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
492 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
493 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
494 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
495
496 # Dash webm
d23028a8
S
497 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
501 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
502 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
503 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
504 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 512 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
513 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
515 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
516 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
519
520 # Dash webm audio
d23028a8
S
521 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
522 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 523
0857baad 524 # Dash webm audio with opus inside
d23028a8
S
525 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
526 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
527 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 528
ce6b9a2d
PH
529 # RTMP (unnamed)
530 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
531
532 # av01 video only formats sometimes served with "unknown" codecs
533 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
535 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
536 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 537 }
29f7c58a 538 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 539
fd5c4aab
S
540 _GEO_BYPASS = False
541
78caa52a 542 IE_NAME = 'youtube'
2eb88d95
PH
543 _TESTS = [
544 {
2d3d2997 545 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
546 'info_dict': {
547 'id': 'BaW_jenozKc',
548 'ext': 'mp4',
3867038a 549 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
550 'uploader': 'Philipp Hagemeister',
551 'uploader_id': 'phihag',
ec85ded8 552 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
553 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
554 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 555 'upload_date': '20121002',
3867038a 556 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 557 'categories': ['Science & Technology'],
3867038a 558 'tags': ['youtube-dl'],
556dbe7f 559 'duration': 10,
dbdaaa23 560 'view_count': int,
3e7c1224
PH
561 'like_count': int,
562 'dislike_count': int,
7c80519c 563 'start_time': 1,
297a564b 564 'end_time': 9,
2eb88d95 565 }
0e853ca4 566 },
fccd3771 567 {
4bc3a23e
PH
568 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
569 'note': 'Embed-only video (#1746)',
570 'info_dict': {
571 'id': 'yZIXLfi8CZQ',
572 'ext': 'mp4',
573 'upload_date': '20120608',
574 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
575 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
576 'uploader': 'SET India',
94bfcd23 577 'uploader_id': 'setindia',
ec85ded8 578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 579 'age_limit': 18,
545cc85d 580 },
581 'skip': 'Private video',
fccd3771 582 },
11b56058 583 {
8bdd16b4 584 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
585 'note': 'Use the first video ID in the URL',
586 'info_dict': {
587 'id': 'BaW_jenozKc',
588 'ext': 'mp4',
3867038a 589 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
590 'uploader': 'Philipp Hagemeister',
591 'uploader_id': 'phihag',
ec85ded8 592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 593 'upload_date': '20121002',
3867038a 594 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 595 'categories': ['Science & Technology'],
3867038a 596 'tags': ['youtube-dl'],
556dbe7f 597 'duration': 10,
dbdaaa23 598 'view_count': int,
11b56058
PM
599 'like_count': int,
600 'dislike_count': int,
34a7de29
S
601 },
602 'params': {
603 'skip_download': True,
604 },
11b56058 605 },
dd27fd17 606 {
2d3d2997 607 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
608 'note': '256k DASH audio (format 141) via DASH manifest',
609 'info_dict': {
610 'id': 'a9LDPn-MO4I',
611 'ext': 'm4a',
612 'upload_date': '20121002',
613 'uploader_id': '8KVIDEO',
ec85ded8 614 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
615 'description': '',
616 'uploader': '8KVIDEO',
617 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 618 },
4bc3a23e
PH
619 'params': {
620 'youtube_include_dash_manifest': True,
621 'format': '141',
4919603f 622 },
de3c7fe0 623 'skip': 'format 141 not served anymore',
dd27fd17 624 },
8bdd16b4 625 # DASH manifest with encrypted signature
626 {
627 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
628 'info_dict': {
629 'id': 'IB3lcPjvWLA',
630 'ext': 'm4a',
631 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
632 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
633 'duration': 244,
634 'uploader': 'AfrojackVEVO',
635 'uploader_id': 'AfrojackVEVO',
636 'upload_date': '20131011',
cc2db878 637 'abr': 129.495,
8bdd16b4 638 },
639 'params': {
640 'youtube_include_dash_manifest': True,
641 'format': '141/bestaudio[ext=m4a]',
642 },
643 },
aa79ac0c
PH
644 # Controversy video
645 {
646 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
647 'info_dict': {
648 'id': 'T4XJQO3qol8',
649 'ext': 'mp4',
556dbe7f 650 'duration': 219,
aa79ac0c 651 'upload_date': '20100909',
4fe54c12 652 'uploader': 'Amazing Atheist',
aa79ac0c 653 'uploader_id': 'TheAmazingAtheist',
ec85ded8 654 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 655 'title': 'Burning Everyone\'s Koran',
545cc85d 656 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 657 }
c522adb1 658 },
dd2d55f1 659 # Normal age-gate video (embed allowed)
c522adb1 660 {
2d3d2997 661 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
662 'info_dict': {
663 'id': 'HtVdAasjOgU',
664 'ext': 'mp4',
665 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 666 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 667 'duration': 142,
c522adb1
JMF
668 'uploader': 'The Witcher',
669 'uploader_id': 'WitcherGame',
ec85ded8 670 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 671 'upload_date': '20140605',
34952f09 672 'age_limit': 18,
c522adb1
JMF
673 },
674 },
8bdd16b4 675 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
676 # YouTube Red ad is not captured for creator
677 {
678 'url': '__2ABJjxzNo',
679 'info_dict': {
680 'id': '__2ABJjxzNo',
681 'ext': 'mp4',
682 'duration': 266,
683 'upload_date': '20100430',
684 'uploader_id': 'deadmau5',
685 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 686 'creator': 'deadmau5',
687 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 688 'uploader': 'deadmau5',
689 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 690 'alt_title': 'Some Chords',
8bdd16b4 691 },
692 'expected_warnings': [
693 'DASH manifest missing',
694 ]
695 },
067aa17e 696 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
697 {
698 'url': 'lqQg6PlCWgI',
699 'info_dict': {
700 'id': 'lqQg6PlCWgI',
701 'ext': 'mp4',
556dbe7f 702 'duration': 6085,
90227264 703 'upload_date': '20150827',
cbe2bd91 704 'uploader_id': 'olympic',
ec85ded8 705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 706 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 707 'uploader': 'Olympic',
cbe2bd91
PH
708 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
709 },
710 'params': {
711 'skip_download': 'requires avconv',
e52a40ab 712 }
cbe2bd91 713 },
6271f1ca
PH
714 # Non-square pixels
715 {
716 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
717 'info_dict': {
718 'id': '_b-2C3KPAM0',
719 'ext': 'mp4',
720 'stretched_ratio': 16 / 9.,
556dbe7f 721 'duration': 85,
6271f1ca
PH
722 'upload_date': '20110310',
723 'uploader_id': 'AllenMeow',
ec85ded8 724 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 725 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 726 'uploader': '孫ᄋᄅ',
6271f1ca
PH
727 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
728 },
06b491eb
S
729 },
730 # url_encoded_fmt_stream_map is empty string
731 {
732 'url': 'qEJwOuvDf7I',
733 'info_dict': {
734 'id': 'qEJwOuvDf7I',
f57b7835 735 'ext': 'webm',
06b491eb
S
736 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
737 'description': '',
738 'upload_date': '20150404',
739 'uploader_id': 'spbelect',
740 'uploader': 'Наблюдатели Петербурга',
741 },
742 'params': {
743 'skip_download': 'requires avconv',
e323cf3f
S
744 },
745 'skip': 'This live event has ended.',
06b491eb 746 },
067aa17e 747 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
748 {
749 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
750 'info_dict': {
751 'id': 'FIl7x6_3R5Y',
eb6793ba 752 'ext': 'webm',
da77d856
S
753 'title': 'md5:7b81415841e02ecd4313668cde88737a',
754 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 755 'duration': 220,
da77d856
S
756 'upload_date': '20150625',
757 'uploader_id': 'dorappi2000',
ec85ded8 758 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 759 'uploader': 'dorappi2000',
eb6793ba 760 'formats': 'mincount:31',
da77d856 761 },
eb6793ba 762 'skip': 'not actual anymore',
2ee8f5d8 763 },
8a1a26ce
YCH
764 # DASH manifest with segment_list
765 {
766 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
767 'md5': '8ce563a1d667b599d21064e982ab9e31',
768 'info_dict': {
769 'id': 'CsmdDsKjzN8',
770 'ext': 'mp4',
17ee98e1 771 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
772 'uploader': 'Airtek',
773 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
774 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
775 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
776 },
777 'params': {
778 'youtube_include_dash_manifest': True,
779 'format': '135', # bestvideo
be49068d
S
780 },
781 'skip': 'This live event has ended.',
2ee8f5d8 782 },
cf7e015f
S
783 {
784 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 785 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 786 'info_dict': {
545cc85d 787 'id': 'jvGDaLqkpTg',
788 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
789 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
790 },
791 'playlist': [{
792 'info_dict': {
545cc85d 793 'id': 'jvGDaLqkpTg',
cf7e015f 794 'ext': 'mp4',
545cc85d 795 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
796 'description': 'md5:e03b909557865076822aa169218d6a5d',
797 'duration': 10643,
798 'upload_date': '20161111',
799 'uploader': 'Team PGP',
800 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
801 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
802 },
803 }, {
804 'info_dict': {
545cc85d 805 'id': '3AKt1R1aDnw',
cf7e015f 806 'ext': 'mp4',
545cc85d 807 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
808 'description': 'md5:e03b909557865076822aa169218d6a5d',
809 'duration': 10991,
810 'upload_date': '20161111',
811 'uploader': 'Team PGP',
812 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
814 },
815 }, {
816 'info_dict': {
545cc85d 817 'id': 'RtAMM00gpVc',
cf7e015f 818 'ext': 'mp4',
545cc85d 819 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
820 'description': 'md5:e03b909557865076822aa169218d6a5d',
821 'duration': 10995,
822 'upload_date': '20161111',
823 'uploader': 'Team PGP',
824 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
825 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
826 },
827 }, {
828 'info_dict': {
545cc85d 829 'id': '6N2fdlP3C5U',
cf7e015f 830 'ext': 'mp4',
545cc85d 831 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
832 'description': 'md5:e03b909557865076822aa169218d6a5d',
833 'duration': 10990,
834 'upload_date': '20161111',
835 'uploader': 'Team PGP',
836 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
838 },
839 }],
840 'params': {
841 'skip_download': True,
842 },
cbaed4bb 843 },
f9f49d87 844 {
067aa17e 845 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
846 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
847 'info_dict': {
848 'id': 'gVfLd0zydlo',
849 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
850 },
851 'playlist_count': 2,
be49068d 852 'skip': 'Not multifeed anymore',
f9f49d87 853 },
cbaed4bb 854 {
2d3d2997 855 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 856 'only_matching': True,
0e49d9a6 857 },
6d4fc66b 858 {
2d3d2997 859 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
860 'only_matching': True,
861 },
0e49d9a6 862 {
067aa17e 863 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 864 # Also tests cut-off URL expansion in video description (see
067aa17e
S
865 # https://github.com/ytdl-org/youtube-dl/issues/1892,
866 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
867 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
868 'info_dict': {
869 'id': 'lsguqyKfVQg',
870 'ext': 'mp4',
871 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 872 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 873 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 874 'duration': 133,
0e49d9a6
LL
875 'upload_date': '20151119',
876 'uploader_id': 'IronSoulElf',
ec85ded8 877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 878 'uploader': 'IronSoulElf',
eb6793ba
S
879 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
880 'track': 'Dark Walk - Position Music',
881 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 882 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
883 },
884 'params': {
885 'skip_download': True,
886 },
887 },
61f92af1 888 {
067aa17e 889 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
890 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
891 'only_matching': True,
892 },
313dfc45
LL
893 {
894 # Video with yt:stretch=17:0
895 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
896 'info_dict': {
897 'id': 'Q39EVAstoRM',
898 'ext': 'mp4',
899 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
900 'description': 'md5:ee18a25c350637c8faff806845bddee9',
901 'upload_date': '20151107',
902 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
903 'uploader': 'CH GAMER DROID',
904 },
905 'params': {
906 'skip_download': True,
907 },
be49068d 908 'skip': 'This video does not exist.',
313dfc45 909 },
7caf9830
S
910 {
911 # Video licensed under Creative Commons
912 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
913 'info_dict': {
914 'id': 'M4gD1WSo5mA',
915 'ext': 'mp4',
916 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
917 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 918 'duration': 721,
7caf9830
S
919 'upload_date': '20150127',
920 'uploader_id': 'BerkmanCenter',
ec85ded8 921 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 922 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
923 'license': 'Creative Commons Attribution license (reuse allowed)',
924 },
925 'params': {
926 'skip_download': True,
927 },
928 },
fd050249
S
929 {
930 # Channel-like uploader_url
931 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
932 'info_dict': {
933 'id': 'eQcmzGIKrzg',
934 'ext': 'mp4',
935 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 936 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 937 'duration': 4060,
fd050249 938 'upload_date': '20151119',
eb6793ba 939 'uploader': 'Bernie Sanders',
fd050249 940 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 941 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
942 'license': 'Creative Commons Attribution license (reuse allowed)',
943 },
944 'params': {
945 'skip_download': True,
946 },
947 },
040ac686
S
948 {
949 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
950 'only_matching': True,
7f29cf54
S
951 },
952 {
067aa17e 953 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
954 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
955 'only_matching': True,
6496ccb4
S
956 },
957 {
958 # Rental video preview
959 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
960 'info_dict': {
961 'id': 'uGpuVWrhIzE',
962 'ext': 'mp4',
963 'title': 'Piku - Trailer',
964 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
965 'upload_date': '20150811',
966 'uploader': 'FlixMatrix',
967 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 968 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
969 'license': 'Standard YouTube License',
970 },
971 'params': {
972 'skip_download': True,
973 },
eb6793ba 974 'skip': 'This video is not available.',
022a5d66 975 },
12afdc2a
S
976 {
977 # YouTube Red video with episode data
978 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
979 'info_dict': {
980 'id': 'iqKdEhx-dD4',
981 'ext': 'mp4',
982 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 983 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 984 'duration': 2085,
12afdc2a
S
985 'upload_date': '20170118',
986 'uploader': 'Vsauce',
987 'uploader_id': 'Vsauce',
988 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
989 'series': 'Mind Field',
990 'season_number': 1,
991 'episode_number': 1,
992 },
993 'params': {
994 'skip_download': True,
995 },
996 'expected_warnings': [
997 'Skipping DASH manifest',
998 ],
999 },
c7121fa7
S
1000 {
1001 # The following content has been identified by the YouTube community
1002 # as inappropriate or offensive to some audiences.
1003 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1004 'info_dict': {
1005 'id': '6SJNVb0GnPI',
1006 'ext': 'mp4',
1007 'title': 'Race Differences in Intelligence',
1008 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1009 'duration': 965,
1010 'upload_date': '20140124',
1011 'uploader': 'New Century Foundation',
1012 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1013 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1014 },
1015 'params': {
1016 'skip_download': True,
1017 },
545cc85d 1018 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1019 },
022a5d66
S
1020 {
1021 # itag 212
1022 'url': '1t24XAntNCY',
1023 'only_matching': True,
fd5c4aab
S
1024 },
1025 {
1026 # geo restricted to JP
1027 'url': 'sJL6WA-aGkQ',
1028 'only_matching': True,
1029 },
cd5a74a2
S
1030 {
1031 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1032 'only_matching': True,
1033 },
825cd268
RA
1034 {
1035 # DRM protected
1036 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1037 'only_matching': True,
4fe54c12
S
1038 },
1039 {
1040 # Video with unsupported adaptive stream type formats
1041 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1042 'info_dict': {
1043 'id': 'Z4Vy8R84T1U',
1044 'ext': 'mp4',
1045 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1046 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1047 'duration': 433,
1048 'upload_date': '20130923',
1049 'uploader': 'Amelia Putri Harwita',
1050 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1051 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1052 'formats': 'maxcount:10',
1053 },
1054 'params': {
1055 'skip_download': True,
1056 'youtube_include_dash_manifest': False,
1057 },
5429d6a9 1058 'skip': 'not actual anymore',
5caabd3c 1059 },
1060 {
822b9d9c 1061 # Youtube Music Auto-generated description
5caabd3c 1062 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1063 'info_dict': {
1064 'id': 'MgNrAu2pzNs',
1065 'ext': 'mp4',
1066 'title': 'Voyeur Girl',
1067 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1068 'upload_date': '20190312',
5429d6a9
S
1069 'uploader': 'Stephen - Topic',
1070 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1071 'artist': 'Stephen',
1072 'track': 'Voyeur Girl',
1073 'album': 'it\'s too much love to know my dear',
1074 'release_date': '20190313',
1075 'release_year': 2019,
1076 },
1077 'params': {
1078 'skip_download': True,
1079 },
1080 },
66b48727
RA
1081 {
1082 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1083 'only_matching': True,
1084 },
011e75e6
S
1085 {
1086 # invalid -> valid video id redirection
1087 'url': 'DJztXj2GPfl',
1088 'info_dict': {
1089 'id': 'DJztXj2GPfk',
1090 'ext': 'mp4',
1091 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1092 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1093 'upload_date': '20090125',
1094 'uploader': 'Prochorowka',
1095 'uploader_id': 'Prochorowka',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1097 'artist': 'Panjabi MC',
1098 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1099 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
545cc85d 1104 'skip': 'Video unavailable',
ea74e00b
DP
1105 },
1106 {
1107 # empty description results in an empty string
1108 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1109 'info_dict': {
1110 'id': 'x41yOUIvK2k',
1111 'ext': 'mp4',
1112 'title': 'IMG 3456',
1113 'description': '',
1114 'upload_date': '20170613',
1115 'uploader_id': 'ElevageOrVert',
1116 'uploader': 'ElevageOrVert',
1117 },
1118 'params': {
1119 'skip_download': True,
1120 },
1121 },
a0566bbf 1122 {
29f7c58a 1123 # with '};' inside yt initial data (see [1])
1124 # see [2] for an example with '};' inside ytInitialPlayerResponse
1125 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1126 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1127 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1128 'info_dict': {
1129 'id': 'CHqg6qOn4no',
1130 'ext': 'mp4',
1131 'title': 'Part 77 Sort a list of simple types in c#',
1132 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1133 'upload_date': '20130831',
1134 'uploader_id': 'kudvenkat',
1135 'uploader': 'kudvenkat',
1136 },
1137 'params': {
1138 'skip_download': True,
1139 },
1140 },
29f7c58a 1141 {
1142 # another example of '};' in ytInitialData
1143 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1144 'only_matching': True,
1145 },
1146 {
1147 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1148 'only_matching': True,
1149 },
545cc85d 1150 {
cc2db878 1151 # https://github.com/ytdl-org/youtube-dl/pull/28094
1152 'url': 'OtqTfy26tG0',
1153 'info_dict': {
1154 'id': 'OtqTfy26tG0',
1155 'ext': 'mp4',
1156 'title': 'Burn Out',
1157 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1158 'upload_date': '20141120',
1159 'uploader': 'The Cinematic Orchestra - Topic',
1160 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1161 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1162 'artist': 'The Cinematic Orchestra',
1163 'track': 'Burn Out',
1164 'album': 'Every Day',
1165 'release_data': None,
1166 'release_year': None,
1167 },
1168 'params': {
1169 'skip_download': True,
1170 },
545cc85d 1171 },
2eb88d95
PH
1172 ]
1173
e0df6211
PH
1174 def __init__(self, *args, **kwargs):
1175 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1176 self._code_cache = {}
83799698 1177 self._player_cache = {}
e0df6211 1178
60064c53
PH
1179 def _signature_cache_id(self, example_sig):
1180 """ Return a string representation of a signature """
78caa52a 1181 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1182
e40c758c
S
1183 @classmethod
1184 def _extract_player_info(cls, player_url):
1185 for player_re in cls._PLAYER_INFO_RE:
1186 id_m = re.search(player_re, player_url)
1187 if id_m:
1188 break
1189 else:
c081b35c 1190 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1191 return id_m.group('id')
e40c758c
S
1192
1193 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1194 player_id = self._extract_player_info(player_url)
e0df6211 1195
c4417ddb 1196 # Read from filesystem cache
545cc85d 1197 func_id = 'js_%s_%s' % (
1198 player_id, self._signature_cache_id(example_sig))
c4417ddb 1199 assert os.path.basename(func_id) == func_id
a0e07d31 1200
69ea8ca4 1201 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1202 if cache_spec is not None:
78caa52a 1203 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1204
545cc85d 1205 if player_id not in self._code_cache:
1206 self._code_cache[player_id] = self._download_webpage(
e0df6211 1207 player_url, video_id,
545cc85d 1208 note='Downloading player ' + player_id,
69ea8ca4 1209 errnote='Download of %s failed' % player_url)
545cc85d 1210 code = self._code_cache[player_id]
1211 res = self._parse_sig_js(code)
e0df6211 1212
785521bf
PH
1213 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1214 cache_res = res(test_string)
1215 cache_spec = [ord(c) for c in cache_res]
83799698 1216
69ea8ca4 1217 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1218 return res
1219
60064c53 1220 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1221 def gen_sig_code(idxs):
1222 def _genslice(start, end, step):
78caa52a 1223 starts = '' if start == 0 else str(start)
8bcc8756 1224 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1225 steps = '' if step == 1 else (':%d' % step)
78caa52a 1226 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1227
1228 step = None
7af808a5
PH
1229 # Quelch pyflakes warnings - start will be set when step is set
1230 start = '(Never used)'
edf3e38e
PH
1231 for i, prev in zip(idxs[1:], idxs[:-1]):
1232 if step is not None:
1233 if i - prev == step:
1234 continue
1235 yield _genslice(start, prev, step)
1236 step = None
1237 continue
1238 if i - prev in [-1, 1]:
1239 step = i - prev
1240 start = prev
1241 continue
1242 else:
78caa52a 1243 yield 's[%d]' % prev
edf3e38e 1244 if step is None:
78caa52a 1245 yield 's[%d]' % i
edf3e38e
PH
1246 else:
1247 yield _genslice(start, i, step)
1248
78caa52a 1249 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1250 cache_res = func(test_string)
edf3e38e 1251 cache_spec = [ord(c) for c in cache_res]
78caa52a 1252 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1253 signature_id_tuple = '(%s)' % (
1254 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1255 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1256 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1257 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1258
e0df6211
PH
1259 def _parse_sig_js(self, jscode):
1260 funcname = self._search_regex(
abefc03f
S
1261 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1262 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1263 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1264 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1265 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1266 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1267 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1268 # Obsolete patterns
1269 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1270 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1271 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1272 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1273 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1274 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1275 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1276 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1277 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1278
1279 jsi = JSInterpreter(jscode)
1280 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1281 return lambda s: initial_function([s])
1282
545cc85d 1283 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1284 """Turn the encrypted s field into a working signature"""
6b37f0be 1285
c8bf86d5 1286 if player_url is None:
69ea8ca4 1287 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1288
69ea8ca4 1289 if player_url.startswith('//'):
78caa52a 1290 player_url = 'https:' + player_url
3c90cc8b
S
1291 elif not re.match(r'https?://', player_url):
1292 player_url = compat_urlparse.urljoin(
1293 'https://www.youtube.com', player_url)
c8bf86d5 1294 try:
62af3a0e 1295 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1296 if player_id not in self._player_cache:
1297 func = self._extract_signature_function(
60064c53 1298 video_id, player_url, s
c8bf86d5
PH
1299 )
1300 self._player_cache[player_id] = func
1301 func = self._player_cache[player_id]
1302 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1303 self._print_sig_code(func, s)
c8bf86d5
PH
1304 return func(s)
1305 except Exception as e:
1306 tb = traceback.format_exc()
1307 raise ExtractorError(
78caa52a 1308 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1309
545cc85d 1310 def _mark_watched(self, video_id, player_response):
21c340b8
S
1311 playback_url = url_or_none(try_get(
1312 player_response,
545cc85d 1313 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1314 if not playback_url:
1315 return
1316 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1317 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1318
1319 # cpn generation algorithm is reverse engineered from base.js.
1320 # In fact it works even with dummy cpn.
1321 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1322 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1323
1324 qs.update({
1325 'ver': ['2'],
1326 'cpn': [cpn],
1327 })
1328 playback_url = compat_urlparse.urlunparse(
15707c7e 1329 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1330
1331 self._download_webpage(
1332 playback_url, video_id, 'Marking watched',
1333 'Unable to mark watched', fatal=False)
1334
66c9fa36
S
1335 @staticmethod
1336 def _extract_urls(webpage):
1337 # Embedded YouTube player
1338 entries = [
1339 unescapeHTML(mobj.group('url'))
1340 for mobj in re.finditer(r'''(?x)
1341 (?:
1342 <iframe[^>]+?src=|
1343 data-video-url=|
1344 <embed[^>]+?src=|
1345 embedSWF\(?:\s*|
1346 <object[^>]+data=|
1347 new\s+SWFObject\(
1348 )
1349 (["\'])
1350 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1351 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1352 \1''', webpage)]
1353
1354 # lazyYT YouTube embed
1355 entries.extend(list(map(
1356 unescapeHTML,
1357 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1358
1359 # Wordpress "YouTube Video Importer" plugin
1360 matches = re.findall(r'''(?x)<div[^>]+
1361 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1362 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1363 entries.extend(m[-1] for m in matches)
1364
1365 return entries
1366
1367 @staticmethod
1368 def _extract_url(webpage):
1369 urls = YoutubeIE._extract_urls(webpage)
1370 return urls[0] if urls else None
1371
97665381
PH
1372 @classmethod
1373 def extract_id(cls, url):
1374 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1375 if mobj is None:
69ea8ca4 1376 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1377 video_id = mobj.group(2)
1378 return video_id
1379
545cc85d 1380 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1381 chapters_list = try_get(
8bdd16b4 1382 data,
84213ea8
S
1383 lambda x: x['playerOverlays']
1384 ['playerOverlayRenderer']
1385 ['decoratedPlayerBarRenderer']
1386 ['decoratedPlayerBarRenderer']
1387 ['playerBar']
1388 ['chapteredPlayerBarRenderer']
1389 ['chapters'],
1390 list)
1391 if not chapters_list:
1392 return
1393
1394 def chapter_time(chapter):
1395 return float_or_none(
1396 try_get(
1397 chapter,
1398 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1399 int),
1400 scale=1000)
1401 chapters = []
1402 for next_num, chapter in enumerate(chapters_list, start=1):
1403 start_time = chapter_time(chapter)
1404 if start_time is None:
1405 continue
1406 end_time = (chapter_time(chapters_list[next_num])
1407 if next_num < len(chapters_list) else duration)
1408 if end_time is None:
1409 continue
1410 title = try_get(
1411 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1412 compat_str)
1413 chapters.append({
1414 'start_time': start_time,
1415 'end_time': end_time,
1416 'title': title,
1417 })
1418 return chapters
1419
545cc85d 1420 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1421 return self._parse_json(self._search_regex(
1422 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1423 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1424
c5e8d7af 1425 def _real_extract(self, url):
cf7e015f 1426 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1427 video_id = self._match_id(url)
1428 base_url = self.http_scheme() + '//www.youtube.com/'
a718ef84 1429 webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1'
545cc85d 1430 webpage = self._download_webpage(webpage_url, video_id, fatal=False)
1431
1432 player_response = None
1433 if webpage:
1434 player_response = self._extract_yt_initial_variable(
1435 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1436 video_id, 'initial player response')
1437 if not player_response:
1438 player_response = self._call_api(
1439 'player', {'videoId': video_id}, video_id)
1440
1441 playability_status = player_response.get('playabilityStatus') or {}
1442 if playability_status.get('reason') == 'Sign in to confirm your age':
1443 pr = self._parse_json(try_get(compat_parse_qs(
1444 self._download_webpage(
1445 base_url + 'get_video_info', video_id,
1446 'Refetching age-gated info webpage',
1447 'unable to download video info webpage', query={
1448 'video_id': video_id,
1449 'eurl': 'https://www.youtube.com/embed/' + video_id,
1450 }, fatal=False)),
1451 lambda x: x['player_response'][0],
1452 compat_str) or '{}', video_id)
1453 if pr:
1454 player_response = pr
1455
1456 trailer_video_id = try_get(
1457 playability_status,
1458 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1459 compat_str)
1460 if trailer_video_id:
1461 return self.url_result(
1462 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1463
545cc85d 1464 def get_text(x):
1465 if not x:
c2d125d9 1466 return
545cc85d 1467 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1468
545cc85d 1469 search_meta = (
1470 lambda x: self._html_search_meta(x, webpage, default=None)) \
1471 if webpage else lambda x: None
dbdaaa23 1472
545cc85d 1473 video_details = player_response.get('videoDetails') or {}
37357d21 1474 microformat = try_get(
545cc85d 1475 player_response,
1476 lambda x: x['microformat']['playerMicroformatRenderer'],
1477 dict) or {}
1478 video_title = video_details.get('title') \
1479 or get_text(microformat.get('title')) \
1480 or search_meta(['og:title', 'twitter:title', 'title'])
1481 video_description = video_details.get('shortDescription')
cf7e015f 1482
8fe10494 1483 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1484 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1485 multifeed_metadata_list = try_get(
1486 player_response,
1487 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1488 compat_str)
8fe10494
S
1489 if multifeed_metadata_list:
1490 entries = []
1491 feed_ids = []
1492 for feed in multifeed_metadata_list.split(','):
1493 # Unquote should take place before split on comma (,) since textual
1494 # fields may contain comma as well (see
067aa17e 1495 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1496 feed_data = compat_parse_qs(
1497 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1498
1499 def feed_entry(name):
545cc85d 1500 return try_get(
1501 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1502
1503 feed_id = feed_entry('id')
1504 if not feed_id:
1505 continue
1506 feed_title = feed_entry('title')
1507 title = video_title
1508 if feed_title:
1509 title += ' (%s)' % feed_title
8fe10494
S
1510 entries.append({
1511 '_type': 'url_transparent',
1512 'ie_key': 'Youtube',
1513 'url': smuggle_url(
545cc85d 1514 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1515 {'force_singlefeed': True}),
6b09401b 1516 'title': title,
8fe10494 1517 })
6b09401b 1518 feed_ids.append(feed_id)
8fe10494
S
1519 self.to_screen(
1520 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1521 % (', '.join(feed_ids), video_id))
545cc85d 1522 return self.playlist_result(
1523 entries, video_id, video_title, video_description)
8fe10494
S
1524 else:
1525 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1526
545cc85d 1527 formats = []
1528 itags = []
cc2db878 1529 itag_qualities = {}
545cc85d 1530 player_url = None
dca3ff4a 1531 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1532 streaming_data = player_response.get('streamingData') or {}
1533 streaming_formats = streaming_data.get('formats') or []
1534 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1535 for fmt in streaming_formats:
1536 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1537 continue
321bf820 1538
cc2db878 1539 itag = str_or_none(fmt.get('itag'))
1540 quality = fmt.get('quality')
1541 if itag and quality:
1542 itag_qualities[itag] = quality
1543 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1544 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1545 # number of fragment that would subsequently requested with (`&sq=N`)
1546 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1547 continue
1548
545cc85d 1549 fmt_url = fmt.get('url')
1550 if not fmt_url:
1551 sc = compat_parse_qs(fmt.get('signatureCipher'))
1552 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1553 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1554 if not (sc and fmt_url and encrypted_sig):
1555 continue
1556 if not player_url:
1557 if not webpage:
1558 continue
1559 player_url = self._search_regex(
1560 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1561 webpage, 'player URL', fatal=False)
1562 if not player_url:
201e9eaa 1563 continue
545cc85d 1564 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1565 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1566 fmt_url += '&' + sp + '=' + signature
1567
545cc85d 1568 if itag:
1569 itags.append(itag)
cc2db878 1570 tbr = float_or_none(
1571 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1572 dct = {
1573 'asr': int_or_none(fmt.get('audioSampleRate')),
1574 'filesize': int_or_none(fmt.get('contentLength')),
1575 'format_id': itag,
1576 'format_note': fmt.get('qualityLabel') or quality,
1577 'fps': int_or_none(fmt.get('fps')),
1578 'height': int_or_none(fmt.get('height')),
dca3ff4a 1579 'quality': q(quality),
cc2db878 1580 'tbr': tbr,
545cc85d 1581 'url': fmt_url,
1582 'width': fmt.get('width'),
1583 }
1584 mimetype = fmt.get('mimeType')
1585 if mimetype:
1586 mobj = re.match(
1587 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1588 if mobj:
1589 dct['ext'] = mimetype2ext(mobj.group(1))
1590 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1591 no_audio = dct.get('acodec') == 'none'
1592 no_video = dct.get('vcodec') == 'none'
1593 if no_audio:
1594 dct['vbr'] = tbr
1595 if no_video:
1596 dct['abr'] = tbr
1597 if no_audio or no_video:
545cc85d 1598 dct['downloader_options'] = {
1599 # Youtube throttles chunks >~10M
1600 'http_chunk_size': 10485760,
bf1317d2 1601 }
545cc85d 1602 formats.append(dct)
1603
1604 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1605 if hls_manifest_url:
1606 for f in self._extract_m3u8_formats(
1607 hls_manifest_url, video_id, 'mp4', fatal=False):
1608 itag = self._search_regex(
1609 r'/itag/(\d+)', f['url'], 'itag', default=None)
1610 if itag:
1611 f['format_id'] = itag
1612 formats.append(f)
1613
1614 if self._downloader.params.get('youtube_include_dash_manifest'):
1615 dash_manifest_url = streaming_data.get('dashManifestUrl')
1616 if dash_manifest_url:
545cc85d 1617 for f in self._extract_mpd_formats(
1618 dash_manifest_url, video_id, fatal=False):
cc2db878 1619 itag = f['format_id']
1620 if itag in itags:
1621 continue
dca3ff4a 1622 if itag in itag_qualities:
1623 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1624 # but kept to maintain feature parity (and code similarity) with youtube-dl
1625 # Remove if this causes any issues with sorting in future
1626 f['quality'] = q(itag_qualities[itag])
545cc85d 1627 filesize = int_or_none(self._search_regex(
1628 r'/clen/(\d+)', f.get('fragment_base_url')
1629 or f['url'], 'file size', default=None))
1630 if filesize:
1631 f['filesize'] = filesize
cc2db878 1632 formats.append(f)
bf1317d2 1633
545cc85d 1634 if not formats:
63ad4d43 1635 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1636 raise ExtractorError(
1637 'This video is DRM protected.', expected=True)
1638 pemr = try_get(
1639 playability_status,
1640 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1641 dict) or {}
1642 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1643 subreason = pemr.get('subreason')
1644 if subreason:
1645 subreason = clean_html(get_text(subreason))
1646 if subreason == 'The uploader has not made this video available in your country.':
1647 countries = microformat.get('availableCountries')
1648 if not countries:
1649 regions_allowed = search_meta('regionsAllowed')
1650 countries = regions_allowed.split(',') if regions_allowed else None
1651 self.raise_geo_restricted(
1652 subreason, countries)
1653 reason += '\n' + subreason
1654 if reason:
1655 raise ExtractorError(reason, expected=True)
bf1317d2 1656
545cc85d 1657 self._sort_formats(formats)
bf1317d2 1658
545cc85d 1659 keywords = video_details.get('keywords') or []
1660 if not keywords and webpage:
1661 keywords = [
1662 unescapeHTML(m.group('content'))
1663 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1664 for keyword in keywords:
1665 if keyword.startswith('yt:stretch='):
1666 w, h = keyword.split('=')[1].split(':')
1667 w, h = int(w), int(h)
1668 if w > 0 and h > 0:
1669 ratio = w / h
1670 for f in formats:
1671 if f.get('vcodec') != 'none':
1672 f['stretched_ratio'] = ratio
6449cd80 1673
545cc85d 1674 thumbnails = []
1675 for container in (video_details, microformat):
1676 for thumbnail in (try_get(
1677 container,
1678 lambda x: x['thumbnail']['thumbnails'], list) or []):
1679 thumbnail_url = thumbnail.get('url')
1680 if not thumbnail_url:
bf1317d2 1681 continue
545cc85d 1682 thumbnails.append({
1683 'height': int_or_none(thumbnail.get('height')),
1684 'url': thumbnail_url,
1685 'width': int_or_none(thumbnail.get('width')),
1686 })
1687 if thumbnails:
1688 break
a6211d23 1689 else:
545cc85d 1690 thumbnail = search_meta(['og:image', 'twitter:image'])
1691 if thumbnail:
1692 thumbnails = [{'url': thumbnail}]
1693
1694 category = microformat.get('category') or search_meta('genre')
1695 channel_id = video_details.get('channelId') \
1696 or microformat.get('externalChannelId') \
1697 or search_meta('channelId')
1698 duration = int_or_none(
1699 video_details.get('lengthSeconds')
1700 or microformat.get('lengthSeconds')) \
1701 or parse_duration(search_meta('duration'))
1702 is_live = video_details.get('isLive')
1703 owner_profile_url = microformat.get('ownerProfileUrl')
1704
1705 info = {
1706 'id': video_id,
1707 'title': self._live_title(video_title) if is_live else video_title,
1708 'formats': formats,
1709 'thumbnails': thumbnails,
1710 'description': video_description,
1711 'upload_date': unified_strdate(
1712 microformat.get('uploadDate')
1713 or search_meta('uploadDate')),
1714 'uploader': video_details['author'],
1715 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1716 'uploader_url': owner_profile_url,
1717 'channel_id': channel_id,
1718 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1719 'duration': duration,
1720 'view_count': int_or_none(
1721 video_details.get('viewCount')
1722 or microformat.get('viewCount')
1723 or search_meta('interactionCount')),
1724 'average_rating': float_or_none(video_details.get('averageRating')),
1725 'age_limit': 18 if (
1726 microformat.get('isFamilySafe') is False
1727 or search_meta('isFamilyFriendly') == 'false'
1728 or search_meta('og:restrictions:age') == '18+') else 0,
1729 'webpage_url': webpage_url,
1730 'categories': [category] if category else None,
1731 'tags': keywords,
1732 'is_live': is_live,
1733 'playable_in_embed': playability_status.get('playableInEmbed'),
1734 }
b477fc13 1735
545cc85d 1736 pctr = try_get(
1737 player_response,
1738 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1739 subtitles = {}
1740 if pctr:
1741 def process_language(container, base_url, lang_code, query):
1742 lang_subs = []
1743 for fmt in self._SUBTITLE_FORMATS:
1744 query.update({
1745 'fmt': fmt,
1746 })
1747 lang_subs.append({
1748 'ext': fmt,
1749 'url': update_url_query(base_url, query),
1750 })
1751 container[lang_code] = lang_subs
7e72694b 1752
545cc85d 1753 for caption_track in (pctr.get('captionTracks') or []):
1754 base_url = caption_track.get('baseUrl')
1755 if not base_url:
1756 continue
1757 if caption_track.get('kind') != 'asr':
1758 lang_code = caption_track.get('languageCode')
1759 if not lang_code:
1760 continue
1761 process_language(
1762 subtitles, base_url, lang_code, {})
1763 continue
1764 automatic_captions = {}
1765 for translation_language in (pctr.get('translationLanguages') or []):
1766 translation_language_code = translation_language.get('languageCode')
1767 if not translation_language_code:
1768 continue
1769 process_language(
1770 automatic_captions, base_url, translation_language_code,
1771 {'tlang': translation_language_code})
1772 info['automatic_captions'] = automatic_captions
1773 info['subtitles'] = subtitles
7e72694b 1774
545cc85d 1775 parsed_url = compat_urllib_parse_urlparse(url)
1776 for component in [parsed_url.fragment, parsed_url.query]:
1777 query = compat_parse_qs(component)
1778 for k, v in query.items():
1779 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1780 d_k += '_time'
1781 if d_k not in info and k in s_ks:
1782 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1783
1784 # Youtube Music Auto-generated description
822b9d9c 1785 if video_description:
38d70284 1786 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1787 if mobj:
822b9d9c
RA
1788 release_year = mobj.group('release_year')
1789 release_date = mobj.group('release_date')
1790 if release_date:
1791 release_date = release_date.replace('-', '')
1792 if not release_year:
545cc85d 1793 release_year = release_date[:4]
1794 info.update({
1795 'album': mobj.group('album'.strip()),
1796 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1797 'track': mobj.group('track').strip(),
1798 'release_date': release_date,
cc2db878 1799 'release_year': int_or_none(release_year),
545cc85d 1800 })
7e72694b 1801
545cc85d 1802 initial_data = None
1803 if webpage:
1804 initial_data = self._extract_yt_initial_variable(
1805 webpage, self._YT_INITIAL_DATA_RE, video_id,
1806 'yt initial data')
1807 if not initial_data:
1808 initial_data = self._call_api(
1809 'next', {'videoId': video_id}, video_id, fatal=False)
1810
1811 if not is_live:
1812 try:
1813 # This will error if there is no livechat
1814 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1815 info['subtitles']['live_chat'] = [{
1816 'video_id': video_id,
1817 'ext': 'json',
1818 'protocol': 'youtube_live_chat_replay',
1819 }]
1820 except (KeyError, IndexError, TypeError):
1821 pass
1822
1823 if initial_data:
1824 chapters = self._extract_chapters_from_json(
1825 initial_data, video_id, duration)
1826 if not chapters:
1827 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1828 contents = try_get(
1829 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1830 list)
1831 if not contents:
1832 continue
1833
1834 def chapter_time(mmlir):
1835 return parse_duration(
1836 get_text(mmlir.get('timeDescription')))
1837
1838 chapters = []
1839 for next_num, content in enumerate(contents, start=1):
1840 mmlir = content.get('macroMarkersListItemRenderer') or {}
1841 start_time = chapter_time(mmlir)
1842 end_time = chapter_time(try_get(
1843 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1844 if next_num < len(contents) else duration
1845 if start_time is None or end_time is None:
1846 continue
1847 chapters.append({
1848 'start_time': start_time,
1849 'end_time': end_time,
1850 'title': get_text(mmlir.get('title')),
1851 })
1852 if chapters:
1853 break
1854 if chapters:
1855 info['chapters'] = chapters
1856
1857 contents = try_get(
1858 initial_data,
1859 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1860 list) or []
1861 for content in contents:
1862 vpir = content.get('videoPrimaryInfoRenderer')
1863 if vpir:
1864 stl = vpir.get('superTitleLink')
1865 if stl:
1866 stl = get_text(stl)
1867 if try_get(
1868 vpir,
1869 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1870 info['location'] = stl
1871 else:
1872 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1873 if mobj:
1874 info.update({
1875 'series': mobj.group(1),
1876 'season_number': int(mobj.group(2)),
1877 'episode_number': int(mobj.group(3)),
1878 })
1879 for tlb in (try_get(
1880 vpir,
1881 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1882 list) or []):
1883 tbr = tlb.get('toggleButtonRenderer') or {}
1884 for getter, regex in [(
1885 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1886 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1887 lambda x: x['accessibility'],
1888 lambda x: x['accessibilityData']['accessibilityData'],
1889 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1890 label = (try_get(tbr, getter, dict) or {}).get('label')
1891 if label:
1892 mobj = re.match(regex, label)
1893 if mobj:
1894 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1895 break
1896 sbr_tooltip = try_get(
1897 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1898 if sbr_tooltip:
1899 like_count, dislike_count = sbr_tooltip.split(' / ')
1900 info.update({
1901 'like_count': str_to_int(like_count),
1902 'dislike_count': str_to_int(dislike_count),
1903 })
1904 vsir = content.get('videoSecondaryInfoRenderer')
1905 if vsir:
1906 info['channel'] = get_text(try_get(
1907 vsir,
1908 lambda x: x['owner']['videoOwnerRenderer']['title'],
1909 compat_str))
1910 rows = try_get(
1911 vsir,
1912 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1913 list) or []
1914 multiple_songs = False
1915 for row in rows:
1916 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1917 multiple_songs = True
1918 break
1919 for row in rows:
1920 mrr = row.get('metadataRowRenderer') or {}
1921 mrr_title = mrr.get('title')
1922 if not mrr_title:
1923 continue
1924 mrr_title = get_text(mrr['title'])
1925 mrr_contents_text = get_text(mrr['contents'][0])
1926 if mrr_title == 'License':
1927 info['license'] = mrr_contents_text
1928 elif not multiple_songs:
1929 if mrr_title == 'Album':
1930 info['album'] = mrr_contents_text
1931 elif mrr_title == 'Artist':
1932 info['artist'] = mrr_contents_text
1933 elif mrr_title == 'Song':
1934 info['track'] = mrr_contents_text
1935
1936 fallbacks = {
1937 'channel': 'uploader',
1938 'channel_id': 'uploader_id',
1939 'channel_url': 'uploader_url',
1940 }
1941 for to, frm in fallbacks.items():
1942 if not info.get(to):
1943 info[to] = info.get(frm)
1944
1945 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1946 v = info.get(s_k)
1947 if v:
1948 info[d_k] = v
b84071c0 1949
06167fbb 1950 # get xsrf for annotations or comments
1951 get_annotations = self._downloader.params.get('writeannotations', False)
1952 get_comments = self._downloader.params.get('getcomments', False)
1953 if get_annotations or get_comments:
29f7c58a 1954 xsrf_token = None
545cc85d 1955 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 1956 if ytcfg:
1957 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
1958 if not xsrf_token:
1959 xsrf_token = self._search_regex(
1960 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 1961 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 1962
1963 # annotations
06167fbb 1964 if get_annotations:
64b6a4e9
RA
1965 invideo_url = try_get(
1966 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
1967 if xsrf_token and invideo_url:
29f7c58a 1968 xsrf_field_name = None
1969 if ytcfg:
1970 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
1971 if not xsrf_field_name:
1972 xsrf_field_name = self._search_regex(
1973 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 1974 webpage, 'xsrf field name',
29f7c58a 1975 group='xsrf_field_name', default='session_token')
8a784c74 1976 info['annotations'] = self._download_webpage(
64b6a4e9
RA
1977 self._proto_relative_url(invideo_url),
1978 video_id, note='Downloading annotations',
1979 errnote='Unable to download video annotations', fatal=False,
1980 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 1981
06167fbb 1982 # Get comments
1983 # TODO: Refactor and move to seperate function
1984 if get_comments:
1985 expected_video_comment_count = 0
1986 video_comments = []
1987
1988 def find_value(html, key, num_chars=2, separator='"'):
1989 pos_begin = html.find(key) + len(key) + num_chars
1990 pos_end = html.find(separator, pos_begin)
1991 return html[pos_begin: pos_end]
1992
1993 def search_dict(partial, key):
1994 if isinstance(partial, dict):
1995 for k, v in partial.items():
1996 if k == key:
1997 yield v
1998 else:
1999 for o in search_dict(v, key):
2000 yield o
2001 elif isinstance(partial, list):
2002 for i in partial:
2003 for o in search_dict(i, key):
2004 yield o
2005
8a784c74 2006 continuations = []
2007 if initial_data:
2008 try:
2009 ncd = next(search_dict(initial_data, 'nextContinuationData'))
2010 continuations = [ncd['continuation']]
2011 # Handle videos where comments have been disabled entirely
2012 except StopIteration:
2013 pass
06167fbb 2014
8d0ea5f9 2015 def get_continuation(continuation, session_token, replies=False):
06167fbb 2016 query = {
66c935fb 2017 'pbj': 1,
2018 'ctoken': continuation,
06167fbb 2019 }
2020 if replies:
2021 query['action_get_comment_replies'] = 1
2022 else:
2023 query['action_get_comments'] = 1
2024
2025 while True:
2026 content, handle = self._download_webpage_handle(
2027 'https://www.youtube.com/comment_service_ajax',
2028 video_id,
2029 note=False,
2030 expected_status=[413],
2031 data=urlencode_postdata({
2032 'session_token': session_token
2033 }),
2034 query=query,
2035 headers={
2036 'Accept': '*/*',
2037 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2038 'X-YouTube-Client-Name': '1',
2039 'X-YouTube-Client-Version': '2.20201202.06.01'
2040 }
2041 )
2042
2043 response_code = handle.getcode()
2044 if (response_code == 200):
2045 return self._parse_json(content, video_id)
8d0ea5f9 2046 if (response_code == 413):
06167fbb 2047 return None
2048 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2049
2050 first_continuation = True
885d36d4 2051 chain_msg = ''
2052 self.to_screen('Downloading comments')
06167fbb 2053 while continuations:
885d36d4 2054 continuation = continuations.pop()
8d0ea5f9 2055 comment_response = get_continuation(continuation, xsrf_token)
06167fbb 2056 if not comment_response:
2057 continue
2058 if list(search_dict(comment_response, 'externalErrorMessage')):
2059 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2060
8d0ea5f9
B
2061 if 'continuationContents' not in comment_response['response']:
2062 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2063 continue
2064 # not sure if this actually helps
2065 if 'xsrf_token' in comment_response:
2066 xsrf_token = comment_response['xsrf_token']
2067
06167fbb 2068 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2069 if first_continuation:
2070 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2071 first_continuation = False
2072 if 'contents' not in item_section:
2073 # continuation returned no comments?
2074 # set an empty array as to not break the for loop
2075 item_section['contents'] = []
2076
2077 for meta_comment in item_section['contents']:
2078 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2079 video_comments.append({
2080 'id': comment['commentId'],
ba7bf12d 2081 'text': ''.join([c['text'] for c in try_get(comment, lambda x: x['contentText']['runs'], list) or []]),
8d0ea5f9 2082 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2083 'author': comment.get('authorText', {}).get('simpleText', ''),
2084 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2085 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2086 'parent': 'root'
2087 })
2088 if 'replies' not in meta_comment['commentThreadRenderer']:
2089 continue
2090
8d0ea5f9
B
2091 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2092 while reply_continuations:
06167fbb 2093 time.sleep(1)
8d0ea5f9
B
2094 continuation = reply_continuations.pop()
2095 replies_data = get_continuation(continuation, xsrf_token, True)
06167fbb 2096 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2097 continue
06167fbb 2098
2099 if self._downloader.params.get('verbose', False):
885d36d4 2100 chain_msg = ' (chain %s)' % comment['commentId']
2101 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2102 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2103 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2104 reply_comment = reply_meta['commentRenderer']
2105 video_comments.append({
2106 'id': reply_comment['commentId'],
2107 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2108 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2109 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2110 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2111 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2112 'parent': comment['commentId']
2113 })
2114 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2115 continue
8d0ea5f9 2116 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2117
885d36d4 2118 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2119 if 'continuations' in item_section:
8d0ea5f9 2120 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2121 time.sleep(1)
2122
885d36d4 2123 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
545cc85d 2124 info.update({
2125 'comments': video_comments,
2126 'comment_count': expected_video_comment_count
2127 })
4ea3be0a 2128
545cc85d 2129 self.mark_watched(video_id, player_response)
d77ab8e2 2130
545cc85d 2131 return info
c5e8d7af 2132
5f6a1245 2133
8bdd16b4 2134class YoutubeTabIE(YoutubeBaseInfoExtractor):
2135 IE_DESC = 'YouTube.com tab'
70d5c17b 2136 _VALID_URL = r'''(?x)
2137 https?://
2138 (?:\w+\.)?
2139 (?:
2140 youtube(?:kids)?\.com|
2141 invidio\.us
2142 )/
2143 (?:
2144 (?:channel|c|user)/|
2145 (?P<not_channel>
9ba5705a 2146 feed/|hashtag/|
70d5c17b 2147 (?:playlist|watch)\?.*?\blist=
2148 )|
29f7c58a 2149 (?!(?:%s)\b) # Direct URLs
70d5c17b 2150 )
2151 (?P<id>[^/?\#&]+)
2152 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2153 IE_NAME = 'youtube:tab'
2154
81127aa5 2155 _TESTS = [{
8bdd16b4 2156 # playlists, multipage
2157 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2158 'playlist_mincount': 94,
2159 'info_dict': {
2160 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2161 'title': 'Игорь Клейнер - Playlists',
2162 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2163 'uploader': 'Игорь Клейнер',
2164 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2165 },
2166 }, {
2167 # playlists, multipage, different order
2168 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2169 'playlist_mincount': 94,
2170 'info_dict': {
2171 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2172 'title': 'Игорь Клейнер - Playlists',
2173 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2174 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2175 'uploader': 'Игорь Клейнер',
8bdd16b4 2176 },
2177 }, {
2178 # playlists, singlepage
2179 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2180 'playlist_mincount': 4,
2181 'info_dict': {
2182 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2183 'title': 'ThirstForScience - Playlists',
2184 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2185 'uploader': 'ThirstForScience',
2186 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2187 }
2188 }, {
2189 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2190 'only_matching': True,
2191 }, {
2192 # basic, single video playlist
0e30a7b9 2193 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2194 'info_dict': {
0e30a7b9 2195 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2196 'uploader': 'Sergey M.',
2197 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2198 'title': 'youtube-dl public playlist',
81127aa5 2199 },
0e30a7b9 2200 'playlist_count': 1,
9291475f 2201 }, {
8bdd16b4 2202 # empty playlist
0e30a7b9 2203 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2204 'info_dict': {
0e30a7b9 2205 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2206 'uploader': 'Sergey M.',
2207 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2208 'title': 'youtube-dl empty playlist',
9291475f
PH
2209 },
2210 'playlist_count': 0,
2211 }, {
8bdd16b4 2212 # Home tab
2213 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2214 'info_dict': {
8bdd16b4 2215 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2216 'title': 'lex will - Home',
2217 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2218 'uploader': 'lex will',
2219 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2220 },
8bdd16b4 2221 'playlist_mincount': 2,
9291475f 2222 }, {
8bdd16b4 2223 # Videos tab
2224 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2225 'info_dict': {
8bdd16b4 2226 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2227 'title': 'lex will - Videos',
2228 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2229 'uploader': 'lex will',
2230 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2231 },
8bdd16b4 2232 'playlist_mincount': 975,
9291475f 2233 }, {
8bdd16b4 2234 # Videos tab, sorted by popular
2235 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2236 'info_dict': {
8bdd16b4 2237 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2238 'title': 'lex will - Videos',
2239 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2240 'uploader': 'lex will',
2241 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2242 },
8bdd16b4 2243 'playlist_mincount': 199,
9291475f 2244 }, {
8bdd16b4 2245 # Playlists tab
2246 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2247 'info_dict': {
8bdd16b4 2248 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2249 'title': 'lex will - Playlists',
2250 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2251 'uploader': 'lex will',
2252 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2253 },
8bdd16b4 2254 'playlist_mincount': 17,
ac7553d0 2255 }, {
8bdd16b4 2256 # Community tab
2257 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2258 'info_dict': {
8bdd16b4 2259 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2260 'title': 'lex will - Community',
2261 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2262 'uploader': 'lex will',
2263 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2264 },
2265 'playlist_mincount': 18,
87dadd45 2266 }, {
8bdd16b4 2267 # Channels tab
2268 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2269 'info_dict': {
8bdd16b4 2270 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2271 'title': 'lex will - Channels',
2272 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2273 'uploader': 'lex will',
2274 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2275 },
deaec5af 2276 'playlist_mincount': 12,
6b08cdf6 2277 }, {
a0566bbf 2278 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2279 'only_matching': True,
2280 }, {
a0566bbf 2281 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2282 'only_matching': True,
2283 }, {
a0566bbf 2284 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2285 'only_matching': True,
2286 }, {
2287 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2288 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2289 'info_dict': {
2290 'title': '29C3: Not my department',
2291 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2292 'uploader': 'Christiaan008',
2293 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2294 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2295 },
2296 'playlist_count': 96,
2297 }, {
2298 'note': 'Large playlist',
2299 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2300 'info_dict': {
8bdd16b4 2301 'title': 'Uploads from Cauchemar',
2302 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2303 'uploader': 'Cauchemar',
2304 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2305 },
8bdd16b4 2306 'playlist_mincount': 1123,
2307 }, {
2308 # even larger playlist, 8832 videos
2309 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2310 'only_matching': True,
4b7df0d3
JMF
2311 }, {
2312 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2313 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2314 'info_dict': {
acf757f4
PH
2315 'title': 'Uploads from Interstellar Movie',
2316 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2317 'uploader': 'Interstellar Movie',
8bdd16b4 2318 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2319 },
481cc733 2320 'playlist_mincount': 21,
8bdd16b4 2321 }, {
2322 # https://github.com/ytdl-org/youtube-dl/issues/21844
2323 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2324 'info_dict': {
2325 'title': 'Data Analysis with Dr Mike Pound',
2326 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2327 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2328 'uploader': 'Computerphile',
deaec5af 2329 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2330 },
2331 'playlist_mincount': 11,
2332 }, {
a0566bbf 2333 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2334 'only_matching': True,
dacb3a86
S
2335 }, {
2336 # Playlist URL that does not actually serve a playlist
2337 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2338 'info_dict': {
2339 'id': 'FqZTN594JQw',
2340 'ext': 'webm',
2341 'title': "Smiley's People 01 detective, Adventure Series, Action",
2342 'uploader': 'STREEM',
2343 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2344 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2345 'upload_date': '20150526',
2346 'license': 'Standard YouTube License',
2347 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2348 'categories': ['People & Blogs'],
2349 'tags': list,
dbdaaa23 2350 'view_count': int,
dacb3a86
S
2351 'like_count': int,
2352 'dislike_count': int,
2353 },
2354 'params': {
2355 'skip_download': True,
2356 },
13a75688 2357 'skip': 'This video is not available.',
dacb3a86 2358 'add_ie': [YoutubeIE.ie_key()],
481cc733 2359 }, {
8bdd16b4 2360 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2361 'only_matching': True,
66b48727 2362 }, {
8bdd16b4 2363 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2364 'only_matching': True,
a0566bbf 2365 }, {
2366 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2367 'info_dict': {
2368 'id': '9Auq9mYxFEE',
2369 'ext': 'mp4',
deaec5af 2370 'title': compat_str,
a0566bbf 2371 'uploader': 'Sky News',
2372 'uploader_id': 'skynews',
2373 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2374 'upload_date': '20191102',
deaec5af 2375 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2376 'categories': ['News & Politics'],
2377 'tags': list,
2378 'like_count': int,
2379 'dislike_count': int,
2380 },
2381 'params': {
2382 'skip_download': True,
2383 },
2384 }, {
2385 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2386 'info_dict': {
2387 'id': 'a48o2S1cPoo',
2388 'ext': 'mp4',
2389 'title': 'The Young Turks - Live Main Show',
2390 'uploader': 'The Young Turks',
2391 'uploader_id': 'TheYoungTurks',
2392 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2393 'upload_date': '20150715',
2394 'license': 'Standard YouTube License',
2395 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2396 'categories': ['News & Politics'],
2397 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2398 'like_count': int,
2399 'dislike_count': int,
2400 },
2401 'params': {
2402 'skip_download': True,
2403 },
2404 'only_matching': True,
2405 }, {
2406 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2407 'only_matching': True,
2408 }, {
2409 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2410 'only_matching': True,
3d3dddc9 2411 }, {
2412 'url': 'https://www.youtube.com/feed/trending',
2413 'only_matching': True,
2414 }, {
2415 # needs auth
2416 'url': 'https://www.youtube.com/feed/library',
2417 'only_matching': True,
2418 }, {
2419 # needs auth
2420 'url': 'https://www.youtube.com/feed/history',
2421 'only_matching': True,
2422 }, {
2423 # needs auth
2424 'url': 'https://www.youtube.com/feed/subscriptions',
2425 'only_matching': True,
2426 }, {
2427 # needs auth
2428 'url': 'https://www.youtube.com/feed/watch_later',
2429 'only_matching': True,
2430 }, {
2431 # no longer available?
2432 'url': 'https://www.youtube.com/feed/recommended',
2433 'only_matching': True,
29f7c58a 2434 }, {
2435 # inline playlist with not always working continuations
2436 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2437 'only_matching': True,
2438 }, {
2439 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2440 'only_matching': True,
2441 }, {
2442 'url': 'https://www.youtube.com/course',
2443 'only_matching': True,
2444 }, {
2445 'url': 'https://www.youtube.com/zsecurity',
2446 'only_matching': True,
2447 }, {
2448 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2449 'only_matching': True,
2450 }, {
2451 'url': 'https://www.youtube.com/TheYoungTurks/live',
2452 'only_matching': True,
2453 }]
2454
2455 @classmethod
2456 def suitable(cls, url):
2457 return False if YoutubeIE.suitable(url) else super(
2458 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2459
2460 def _extract_channel_id(self, webpage):
2461 channel_id = self._html_search_meta(
2462 'channelId', webpage, 'channel id', default=None)
2463 if channel_id:
2464 return channel_id
2465 channel_url = self._html_search_meta(
2466 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2467 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2468 'twitter:app:url:googleplay'), webpage, 'channel url')
2469 return self._search_regex(
2470 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2471 channel_url, 'channel id')
15f6397c 2472
8bdd16b4 2473 @staticmethod
2474 def _extract_grid_item_renderer(item):
2475 for item_kind in ('Playlist', 'Video', 'Channel'):
2476 renderer = item.get('grid%sRenderer' % item_kind)
2477 if renderer:
2478 return renderer
2479
8bdd16b4 2480 def _grid_entries(self, grid_renderer):
2481 for item in grid_renderer['items']:
2482 if not isinstance(item, dict):
39b62db1 2483 continue
8bdd16b4 2484 renderer = self._extract_grid_item_renderer(item)
2485 if not isinstance(renderer, dict):
2486 continue
2487 title = try_get(
2488 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2489 # playlist
2490 playlist_id = renderer.get('playlistId')
2491 if playlist_id:
2492 yield self.url_result(
2493 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2494 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2495 video_title=title)
2496 # video
2497 video_id = renderer.get('videoId')
2498 if video_id:
2499 yield self._extract_video(renderer)
2500 # channel
2501 channel_id = renderer.get('channelId')
2502 if channel_id:
2503 title = try_get(
2504 renderer, lambda x: x['title']['simpleText'], compat_str)
2505 yield self.url_result(
2506 'https://www.youtube.com/channel/%s' % channel_id,
2507 ie=YoutubeTabIE.ie_key(), video_title=title)
2508
3d3dddc9 2509 def _shelf_entries_from_content(self, shelf_renderer):
2510 content = shelf_renderer.get('content')
2511 if not isinstance(content, dict):
8bdd16b4 2512 return
3d3dddc9 2513 renderer = content.get('gridRenderer')
2514 if renderer:
2515 # TODO: add support for nested playlists so each shelf is processed
2516 # as separate playlist
2517 # TODO: this includes only first N items
2518 for entry in self._grid_entries(renderer):
2519 yield entry
2520 renderer = content.get('horizontalListRenderer')
2521 if renderer:
2522 # TODO
2523 pass
8bdd16b4 2524
29f7c58a 2525 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2526 ep = try_get(
2527 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2528 compat_str)
2529 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2530 if shelf_url:
29f7c58a 2531 # Skipping links to another channels, note that checking for
2532 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2533 # will not work
2534 if skip_channels and '/channels?' in shelf_url:
2535 return
3d3dddc9 2536 title = try_get(
2537 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2538 yield self.url_result(shelf_url, video_title=title)
2539 # Shelf may not contain shelf URL, fallback to extraction from content
2540 for entry in self._shelf_entries_from_content(shelf_renderer):
2541 yield entry
c5e8d7af 2542
8bdd16b4 2543 def _playlist_entries(self, video_list_renderer):
2544 for content in video_list_renderer['contents']:
2545 if not isinstance(content, dict):
2546 continue
2547 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2548 if not isinstance(renderer, dict):
2549 continue
2550 video_id = renderer.get('videoId')
2551 if not video_id:
2552 continue
2553 yield self._extract_video(renderer)
07aeced6 2554
3d3dddc9 2555 r""" # Not needed in the new implementation
3462ffa8 2556 def _itemSection_entries(self, item_sect_renderer):
2557 for content in item_sect_renderer['contents']:
2558 if not isinstance(content, dict):
2559 continue
2560 renderer = content.get('videoRenderer', {})
2561 if not isinstance(renderer, dict):
2562 continue
2563 video_id = renderer.get('videoId')
2564 if not video_id:
2565 continue
2566 yield self._extract_video(renderer)
3d3dddc9 2567 """
3462ffa8 2568
2569 def _rich_entries(self, rich_grid_renderer):
2570 renderer = try_get(
70d5c17b 2571 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2572 video_id = renderer.get('videoId')
2573 if not video_id:
2574 return
2575 yield self._extract_video(renderer)
2576
8bdd16b4 2577 def _video_entry(self, video_renderer):
2578 video_id = video_renderer.get('videoId')
2579 if video_id:
2580 return self._extract_video(video_renderer)
dacb3a86 2581
8bdd16b4 2582 def _post_thread_entries(self, post_thread_renderer):
2583 post_renderer = try_get(
2584 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2585 if not post_renderer:
2586 return
2587 # video attachment
2588 video_renderer = try_get(
2589 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2590 video_id = None
2591 if video_renderer:
2592 entry = self._video_entry(video_renderer)
2593 if entry:
2594 yield entry
2595 # inline video links
2596 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2597 for run in runs:
2598 if not isinstance(run, dict):
2599 continue
2600 ep_url = try_get(
2601 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2602 if not ep_url:
2603 continue
2604 if not YoutubeIE.suitable(ep_url):
2605 continue
2606 ep_video_id = YoutubeIE._match_id(ep_url)
2607 if video_id == ep_video_id:
2608 continue
2609 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2610
8bdd16b4 2611 def _post_thread_continuation_entries(self, post_thread_continuation):
2612 contents = post_thread_continuation.get('contents')
2613 if not isinstance(contents, list):
2614 return
2615 for content in contents:
2616 renderer = content.get('backstagePostThreadRenderer')
2617 if not isinstance(renderer, dict):
2618 continue
2619 for entry in self._post_thread_entries(renderer):
2620 yield entry
07aeced6 2621
29f7c58a 2622 @staticmethod
2623 def _build_continuation_query(continuation, ctp=None):
2624 query = {
2625 'ctoken': continuation,
2626 'continuation': continuation,
2627 }
2628 if ctp:
2629 query['itct'] = ctp
2630 return query
2631
8bdd16b4 2632 @staticmethod
2633 def _extract_next_continuation_data(renderer):
2634 next_continuation = try_get(
2635 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2636 if not next_continuation:
2637 return
2638 continuation = next_continuation.get('continuation')
2639 if not continuation:
2640 return
2641 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2642 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2643
8bdd16b4 2644 @classmethod
2645 def _extract_continuation(cls, renderer):
2646 next_continuation = cls._extract_next_continuation_data(renderer)
2647 if next_continuation:
2648 return next_continuation
cc2db878 2649 contents = []
2650 for key in ('contents', 'items'):
2651 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2652 for content in contents:
2653 if not isinstance(content, dict):
2654 continue
2655 continuation_ep = try_get(
2656 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2657 dict)
2658 if not continuation_ep:
2659 continue
2660 continuation = try_get(
2661 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2662 if not continuation:
2663 continue
2664 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2665 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2666
8bdd16b4 2667 def _entries(self, tab, identity_token):
3462ffa8 2668
70d5c17b 2669 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2670 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2671 for content in contents:
2672 if not isinstance(content, dict):
8bdd16b4 2673 continue
70d5c17b 2674 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2675 if not is_renderer:
70d5c17b 2676 renderer = content.get('richItemRenderer')
3462ffa8 2677 if renderer:
2678 for entry in self._rich_entries(renderer):
2679 yield entry
2680 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2681 continue
3462ffa8 2682 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2683 for isr_content in isr_contents:
2684 if not isinstance(isr_content, dict):
2685 continue
69184e41 2686
2687 known_renderers = {
2688 'playlistVideoListRenderer': self._playlist_entries,
2689 'gridRenderer': self._grid_entries,
2690 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2691 'backstagePostThreadRenderer': self._post_thread_entries,
2692 'videoRenderer': lambda x: [self._video_entry(x)],
2693 }
2694 for key, renderer in isr_content.items():
2695 if key not in known_renderers:
2696 continue
2697 for entry in known_renderers[key](renderer):
2698 if entry:
2699 yield entry
3462ffa8 2700 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2701 break
70d5c17b 2702
3462ffa8 2703 if not continuation_list[0]:
2704 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2705
2706 if not continuation_list[0]:
2707 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2708
2709 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2710 tab_content = try_get(tab, lambda x: x['content'], dict)
2711 if not tab_content:
2712 return
3462ffa8 2713 parent_renderer = (
29f7c58a 2714 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2715 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2716 for entry in extract_entries(parent_renderer):
2717 yield entry
3462ffa8 2718 continuation = continuation_list[0]
8bdd16b4 2719
2720 headers = {
2721 'x-youtube-client-name': '1',
2722 'x-youtube-client-version': '2.20201112.04.01',
2723 }
2724 if identity_token:
2725 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2726
8bdd16b4 2727 for page_num in itertools.count(1):
2728 if not continuation:
2729 break
29f7c58a 2730 count = 0
2731 retries = 3
2732 while count <= retries:
2733 try:
2734 # Downloading page may result in intermittent 5xx HTTP error
2735 # that is usually worked around with a retry
2736 browse = self._download_json(
2737 'https://www.youtube.com/browse_ajax', None,
2738 'Downloading page %d%s'
2739 % (page_num, ' (retry #%d)' % count if count else ''),
2740 headers=headers, query=continuation)
2741 break
2742 except ExtractorError as e:
2743 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
2744 count += 1
2745 if count <= retries:
2746 continue
2747 raise
8bdd16b4 2748 if not browse:
2749 break
2750 response = try_get(browse, lambda x: x[1]['response'], dict)
2751 if not response:
2752 break
ebf1b291 2753
69184e41 2754 known_continuation_renderers = {
2755 'playlistVideoListContinuation': self._playlist_entries,
2756 'gridContinuation': self._grid_entries,
2757 'itemSectionContinuation': self._post_thread_continuation_entries,
2758 'sectionListContinuation': extract_entries, # for feeds
2759 }
8bdd16b4 2760 continuation_contents = try_get(
69184e41 2761 response, lambda x: x['continuationContents'], dict) or {}
2762 continuation_renderer = None
2763 for key, value in continuation_contents.items():
2764 if key not in known_continuation_renderers:
3462ffa8 2765 continue
69184e41 2766 continuation_renderer = value
2767 continuation_list = [None]
2768 for entry in known_continuation_renderers[key](continuation_renderer):
2769 yield entry
2770 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2771 break
2772 if continuation_renderer:
2773 continue
c5e8d7af 2774
a1b535bd 2775 known_renderers = {
2776 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2777 'gridVideoRenderer': (self._grid_entries, 'items'),
2778 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
2779 'itemSectionRenderer': (self._playlist_entries, 'contents'),
9ba5705a 2780 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
a1b535bd 2781 }
8bdd16b4 2782 continuation_items = try_get(
2783 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2784 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2785 video_items_renderer = None
2786 for key, value in continuation_item.items():
2787 if key not in known_renderers:
8bdd16b4 2788 continue
a1b535bd 2789 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 2790 continuation_list = [None]
a1b535bd 2791 for entry in known_renderers[key][0](video_items_renderer):
2792 yield entry
9ba5705a 2793 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 2794 break
2795 if video_items_renderer:
2796 continue
8bdd16b4 2797 break
9558dcec 2798
8bdd16b4 2799 @staticmethod
2800 def _extract_selected_tab(tabs):
2801 for tab in tabs:
2802 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2803 return tab['tabRenderer']
2b3c2546 2804 else:
8bdd16b4 2805 raise ExtractorError('Unable to find selected tab')
b82f815f 2806
8bdd16b4 2807 @staticmethod
2808 def _extract_uploader(data):
2809 uploader = {}
2810 sidebar_renderer = try_get(
2811 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2812 if sidebar_renderer:
2813 for item in sidebar_renderer:
2814 if not isinstance(item, dict):
2815 continue
2816 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2817 if not isinstance(renderer, dict):
2818 continue
2819 owner = try_get(
2820 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2821 if owner:
2822 uploader['uploader'] = owner.get('text')
2823 uploader['uploader_id'] = try_get(
2824 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2825 uploader['uploader_url'] = urljoin(
2826 'https://www.youtube.com/',
2827 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2828 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2829
2830 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
b60419c5 2831 playlist_id = title = description = channel_url = channel_name = channel_id = None
2832 thumbnails_list = tags = []
2833
8bdd16b4 2834 selected_tab = self._extract_selected_tab(tabs)
2835 renderer = try_get(
2836 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2837 if renderer:
b60419c5 2838 channel_name = renderer.get('title')
2839 channel_url = renderer.get('channelUrl')
2840 channel_id = renderer.get('externalId')
64c0d954 2841
64c0d954 2842 if not renderer:
2843 renderer = try_get(
2844 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2845 if renderer:
2846 title = renderer.get('title')
ecc97af3 2847 description = renderer.get('description', '')
b60419c5 2848 playlist_id = channel_id
2849 tags = renderer.get('keywords', '').split()
2850 thumbnails_list = (
2851 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2852 or try_get(
2853 data,
2854 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2855 list)
b60419c5 2856 or [])
2857
2858 thumbnails = []
2859 for t in thumbnails_list:
2860 if not isinstance(t, dict):
2861 continue
2862 thumbnail_url = url_or_none(t.get('url'))
2863 if not thumbnail_url:
2864 continue
2865 thumbnails.append({
2866 'url': thumbnail_url,
2867 'width': int_or_none(t.get('width')),
2868 'height': int_or_none(t.get('height')),
2869 })
64c0d954 2870
3462ffa8 2871 if playlist_id is None:
70d5c17b 2872 playlist_id = item_id
2873 if title is None:
b60419c5 2874 title = playlist_id
2875 title += format_field(selected_tab, 'title', ' - %s')
2876
2877 metadata = {
2878 'playlist_id': playlist_id,
2879 'playlist_title': title,
2880 'playlist_description': description,
2881 'uploader': channel_name,
2882 'uploader_id': channel_id,
2883 'uploader_url': channel_url,
2884 'thumbnails': thumbnails,
2885 'tags': tags,
2886 }
2887 if not channel_id:
2888 metadata.update(self._extract_uploader(data))
2889 metadata.update({
2890 'channel': metadata['uploader'],
2891 'channel_id': metadata['uploader_id'],
2892 'channel_url': metadata['uploader_url']})
2893 return self.playlist_result(
29f7c58a 2894 self._entries(selected_tab, identity_token),
b60419c5 2895 **metadata)
73c4ac2c 2896
29f7c58a 2897 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2898 title = playlist.get('title') or try_get(
2899 data, lambda x: x['titleText']['simpleText'], compat_str)
2900 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 2901 # Inline playlist rendition continuation does not always work
2902 # at Youtube side, so delegating regular tab-based playlist URL
2903 # processing whenever possible.
2904 playlist_url = urljoin(url, try_get(
2905 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2906 compat_str))
2907 if playlist_url and playlist_url != url:
2908 return self.url_result(
2909 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2910 video_title=title)
8bdd16b4 2911 return self.playlist_result(
2912 self._playlist_entries(playlist), playlist_id=playlist_id,
2913 playlist_title=title)
c5e8d7af 2914
29f7c58a 2915 @staticmethod
2916 def _extract_alerts(data):
02ced43c 2917 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 2918 if not isinstance(alert_dict, dict):
2919 continue
02ced43c 2920 for renderer in alert_dict:
2921 alert = alert_dict[renderer]
2922 alert_type = alert.get('type')
2923 if not alert_type:
2924 continue
2925 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
2926 if message:
2927 yield alert_type, message
2928 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
2929 message = try_get(run, lambda x: x['text'], compat_str)
2930 if message:
2931 yield alert_type, message
2932
29f7c58a 2933 def _extract_identity_token(self, webpage, item_id):
2934 ytcfg = self._extract_ytcfg(item_id, webpage)
2935 if ytcfg:
2936 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
2937 if token:
2938 return token
2939 return self._search_regex(
2940 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
2941 'identity token', default=None)
2942
8bdd16b4 2943 def _real_extract(self, url):
2944 item_id = self._match_id(url)
2945 url = compat_urlparse.urlunparse(
2946 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 2947 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 2948 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 2949 self._downloader.report_warning(
2950 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 2951 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 2952 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
2953
8bdd16b4 2954 # Handle both video/playlist URLs
2955 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2956 video_id = qs.get('v', [None])[0]
2957 playlist_id = qs.get('list', [None])[0]
f0c532a4 2958
29f7c58a 2959 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 2960 if playlist_id:
2961 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
2962 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
2963 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
2964 else:
2965 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 2966 if video_id and playlist_id:
2967 if self._downloader.params.get('noplaylist'):
2968 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2969 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
2970 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 2971
8bdd16b4 2972 webpage = self._download_webpage(url, item_id)
29f7c58a 2973 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 2974 data = self._extract_yt_initial_data(item_id, webpage)
6b8eb0c0 2975 err_msg = None
02ced43c 2976 for alert_type, alert_message in self._extract_alerts(data):
6b8eb0c0 2977 if alert_type.lower() == 'error':
2978 if err_msg:
2979 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
2980 err_msg = alert_message
2981 else:
2982 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
2983 if err_msg:
2984 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
8bdd16b4 2985 tabs = try_get(
2986 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
2987 if tabs:
2988 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
2989 playlist = try_get(
2990 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
2991 if playlist:
29f7c58a 2992 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 2993 # Fallback to video extraction if no playlist alike page is recognized.
2994 # First check for the current video then try the v attribute of URL query.
2995 video_id = try_get(
2996 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
2997 compat_str) or video_id
8bdd16b4 2998 if video_id:
2999 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3000 # Failed to recognize
3001 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3002
c5e8d7af 3003
8bdd16b4 3004class YoutubePlaylistIE(InfoExtractor):
3005 IE_DESC = 'YouTube.com playlists'
3006 _VALID_URL = r'''(?x)(?:
3007 (?:https?://)?
3008 (?:\w+\.)?
3009 (?:
3010 (?:
3011 youtube(?:kids)?\.com|
29f7c58a 3012 invidio\.us
8bdd16b4 3013 )
3014 /.*?\?.*?\blist=
3015 )?
3016 (?P<id>%(playlist_id)s)
3017 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3018 IE_NAME = 'youtube:playlist'
cdc628a4 3019 _TESTS = [{
8bdd16b4 3020 'note': 'issue #673',
3021 'url': 'PLBB231211A4F62143',
cdc628a4 3022 'info_dict': {
8bdd16b4 3023 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3024 'id': 'PLBB231211A4F62143',
3025 'uploader': 'Wickydoo',
3026 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3027 },
3028 'playlist_mincount': 29,
3029 }, {
3030 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3031 'info_dict': {
3032 'title': 'YDL_safe_search',
3033 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3034 },
3035 'playlist_count': 2,
3036 'skip': 'This playlist is private',
9558dcec 3037 }, {
8bdd16b4 3038 'note': 'embedded',
3039 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3040 'playlist_count': 4,
9558dcec 3041 'info_dict': {
8bdd16b4 3042 'title': 'JODA15',
3043 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3044 'uploader': 'milan',
3045 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3046 }
cdc628a4 3047 }, {
8bdd16b4 3048 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3049 'playlist_mincount': 982,
3050 'info_dict': {
3051 'title': '2018 Chinese New Singles (11/6 updated)',
3052 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3053 'uploader': 'LBK',
3054 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3055 }
daa0df9e 3056 }, {
29f7c58a 3057 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3058 'only_matching': True,
3059 }, {
3060 # music album playlist
3061 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3062 'only_matching': True,
3063 }]
3064
3065 @classmethod
3066 def suitable(cls, url):
3067 return False if YoutubeTabIE.suitable(url) else super(
3068 YoutubePlaylistIE, cls).suitable(url)
3069
3070 def _real_extract(self, url):
3071 playlist_id = self._match_id(url)
3072 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3073 if not qs:
3074 qs = {'list': playlist_id}
3075 return self.url_result(
3076 update_url_query('https://www.youtube.com/playlist', qs),
3077 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3078
3079
3080class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3081 IE_DESC = 'youtu.be'
29f7c58a 3082 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3083 _TESTS = [{
8bdd16b4 3084 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3085 'info_dict': {
3086 'id': 'yeWKywCrFtk',
3087 'ext': 'mp4',
3088 'title': 'Small Scale Baler and Braiding Rugs',
3089 'uploader': 'Backus-Page House Museum',
3090 'uploader_id': 'backuspagemuseum',
3091 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3092 'upload_date': '20161008',
3093 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3094 'categories': ['Nonprofits & Activism'],
3095 'tags': list,
3096 'like_count': int,
3097 'dislike_count': int,
3098 },
3099 'params': {
3100 'noplaylist': True,
3101 'skip_download': True,
3102 },
39e7107d 3103 }, {
8bdd16b4 3104 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3105 'only_matching': True,
cdc628a4
PH
3106 }]
3107
8bdd16b4 3108 def _real_extract(self, url):
29f7c58a 3109 mobj = re.match(self._VALID_URL, url)
3110 video_id = mobj.group('id')
3111 playlist_id = mobj.group('playlist_id')
8bdd16b4 3112 return self.url_result(
29f7c58a 3113 update_url_query('https://www.youtube.com/watch', {
3114 'v': video_id,
3115 'list': playlist_id,
3116 'feature': 'youtu.be',
3117 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3118
3119
3120class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3121 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3122 _VALID_URL = r'ytuser:(?P<id>.+)'
3123 _TESTS = [{
3124 'url': 'ytuser:phihag',
3125 'only_matching': True,
3126 }]
3127
3128 def _real_extract(self, url):
3129 user_id = self._match_id(url)
3130 return self.url_result(
3131 'https://www.youtube.com/user/%s' % user_id,
3132 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3133
b05654f0 3134
3d3dddc9 3135class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3136 IE_NAME = 'youtube:favorites'
3137 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3138 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3139 _LOGIN_REQUIRED = True
3140 _TESTS = [{
3141 'url': ':ytfav',
3142 'only_matching': True,
3143 }, {
3144 'url': ':ytfavorites',
3145 'only_matching': True,
3146 }]
3147
3148 def _real_extract(self, url):
3149 return self.url_result(
3150 'https://www.youtube.com/playlist?list=LL',
3151 ie=YoutubeTabIE.ie_key())
3152
3153
8bdd16b4 3154class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3155 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3156 # there doesn't appear to be a real limit, for example if you search for
3157 # 'python' you get more than 8.000.000 results
3158 _MAX_RESULTS = float('inf')
78caa52a 3159 IE_NAME = 'youtube:search'
b05654f0 3160 _SEARCH_KEY = 'ytsearch'
6c894ea1 3161 _SEARCH_PARAMS = None
9dd8e46a 3162 _TESTS = []
b05654f0 3163
6c894ea1
U
3164 def _entries(self, query, n):
3165 data = {
3166 'context': {
3167 'client': {
3168 'clientName': 'WEB',
3169 'clientVersion': '2.20201021.03.00',
3170 }
3171 },
3172 'query': query,
a22b2fd1 3173 }
6c894ea1
U
3174 if self._SEARCH_PARAMS:
3175 data['params'] = self._SEARCH_PARAMS
3176 total = 0
3177 for page_num in itertools.count(1):
3178 search = self._download_json(
3179 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3180 video_id='query "%s"' % query,
3181 note='Downloading page %s' % page_num,
3182 errnote='Unable to download API page', fatal=False,
3183 data=json.dumps(data).encode('utf8'),
3184 headers={'content-type': 'application/json'})
3185 if not search:
b4c08069 3186 break
6c894ea1
U
3187 slr_contents = try_get(
3188 search,
3189 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3190 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3191 list)
3192 if not slr_contents:
a22b2fd1 3193 break
0366ae87 3194
0366ae87
M
3195 # Youtube sometimes adds promoted content to searches,
3196 # changing the index location of videos and token.
3197 # So we search through all entries till we find them.
30a074c2 3198 continuation_token = None
3199 for slr_content in slr_contents:
a96c6d15 3200 if continuation_token is None:
3201 continuation_token = try_get(
3202 slr_content,
3203 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3204 compat_str)
3205
30a074c2 3206 isr_contents = try_get(
3207 slr_content,
3208 lambda x: x['itemSectionRenderer']['contents'],
3209 list)
9da76d30 3210 if not isr_contents:
30a074c2 3211 continue
3212 for content in isr_contents:
3213 if not isinstance(content, dict):
3214 continue
3215 video = content.get('videoRenderer')
3216 if not isinstance(video, dict):
3217 continue
3218 video_id = video.get('videoId')
3219 if not video_id:
3220 continue
3221
3222 yield self._extract_video(video)
3223 total += 1
3224 if total == n:
3225 return
0366ae87 3226
0366ae87 3227 if not continuation_token:
6c894ea1 3228 break
0366ae87 3229 data['continuation'] = continuation_token
b05654f0 3230
6c894ea1
U
3231 def _get_n_results(self, query, n):
3232 """Get a specified number of results for a query"""
3233 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3234
c9ae7b95 3235
a3dd9248 3236class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3237 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3238 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3239 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3240 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3241
c9ae7b95 3242
386e1dd9 3243class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3244 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3245 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3246 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3247 # _MAX_RESULTS = 100
3462ffa8 3248 _TESTS = [{
3249 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3250 'playlist_mincount': 5,
3251 'info_dict': {
3252 'title': 'youtube-dl test video',
3253 }
3254 }, {
3255 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3256 'only_matching': True,
3257 }]
3258
386e1dd9 3259 @classmethod
3260 def _make_valid_url(cls):
3261 return cls._VALID_URL
3262
3462ffa8 3263 def _real_extract(self, url):
386e1dd9 3264 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3265 query = (qs.get('search_query') or qs.get('q'))[0]
3266 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3267 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3268
3269
3270class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3271 """
25f14e9f 3272 Base class for feed extractors
3d3dddc9 3273 Subclasses must define the _FEED_NAME property.
d7ae0639 3274 """
b2e8bc1b 3275 _LOGIN_REQUIRED = True
3462ffa8 3276 # _MAX_PAGES = 5
ef2f3c7f 3277 _TESTS = []
d7ae0639
JMF
3278
3279 @property
3280 def IE_NAME(self):
78caa52a 3281 return 'youtube:%s' % self._FEED_NAME
04cc9617 3282
81f0259b 3283 def _real_initialize(self):
b2e8bc1b 3284 self._login()
81f0259b 3285
3853309f 3286 def _real_extract(self, url):
3d3dddc9 3287 return self.url_result(
3288 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3289 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3290
3291
ef2f3c7f 3292class YoutubeWatchLaterIE(InfoExtractor):
3293 IE_NAME = 'youtube:watchlater'
70d5c17b 3294 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3295 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3296 _TESTS = [{
8bdd16b4 3297 'url': ':ytwatchlater',
bc7a9cd8
S
3298 'only_matching': True,
3299 }]
25f14e9f
S
3300
3301 def _real_extract(self, url):
ef2f3c7f 3302 return self.url_result(
3303 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3304
3305
25f14e9f
S
3306class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3307 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3308 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3309 _FEED_NAME = 'recommended'
3d3dddc9 3310 _TESTS = [{
3311 'url': ':ytrec',
3312 'only_matching': True,
3313 }, {
3314 'url': ':ytrecommended',
3315 'only_matching': True,
3316 }, {
3317 'url': 'https://youtube.com',
3318 'only_matching': True,
3319 }]
1ed5b5c9 3320
1ed5b5c9 3321
25f14e9f 3322class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3323 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3324 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3325 _FEED_NAME = 'subscriptions'
3d3dddc9 3326 _TESTS = [{
3327 'url': ':ytsubs',
3328 'only_matching': True,
3329 }, {
3330 'url': ':ytsubscriptions',
3331 'only_matching': True,
3332 }]
1ed5b5c9 3333
1ed5b5c9 3334
25f14e9f
S
3335class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3336 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3337 _VALID_URL = r':ythistory'
25f14e9f 3338 _FEED_NAME = 'history'
3d3dddc9 3339 _TESTS = [{
3340 'url': ':ythistory',
3341 'only_matching': True,
3342 }]
1ed5b5c9
JMF
3343
3344
15870e90
PH
3345class YoutubeTruncatedURLIE(InfoExtractor):
3346 IE_NAME = 'youtube:truncated_url'
3347 IE_DESC = False # Do not list
975d35db 3348 _VALID_URL = r'''(?x)
b95aab84
PH
3349 (?:https?://)?
3350 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3351 (?:watch\?(?:
c4808c60 3352 feature=[a-z_]+|
b95aab84
PH
3353 annotation_id=annotation_[^&]+|
3354 x-yt-cl=[0-9]+|
c1708b89 3355 hl=[^&]*|
287be8c6 3356 t=[0-9]+
b95aab84
PH
3357 )?
3358 |
3359 attribution_link\?a=[^&]+
3360 )
3361 $
975d35db 3362 '''
15870e90 3363
c4808c60 3364 _TESTS = [{
2d3d2997 3365 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3366 'only_matching': True,
dc2fc736 3367 }, {
2d3d2997 3368 'url': 'https://www.youtube.com/watch?',
dc2fc736 3369 'only_matching': True,
b95aab84
PH
3370 }, {
3371 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3372 'only_matching': True,
3373 }, {
3374 'url': 'https://www.youtube.com/watch?feature=foo',
3375 'only_matching': True,
c1708b89
PH
3376 }, {
3377 'url': 'https://www.youtube.com/watch?hl=en-GB',
3378 'only_matching': True,
287be8c6
PH
3379 }, {
3380 'url': 'https://www.youtube.com/watch?t=2372',
3381 'only_matching': True,
c4808c60
PH
3382 }]
3383
15870e90
PH
3384 def _real_extract(self, url):
3385 raise ExtractorError(
78caa52a
PH
3386 'Did you forget to quote the URL? Remember that & is a meta '
3387 'character in most shells, so you want to put the URL in quotes, '
3867038a 3388 'like youtube-dl '
2d3d2997 3389 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3390 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3391 expected=True)
772fd5cc
PH
3392
3393
3394class YoutubeTruncatedIDIE(InfoExtractor):
3395 IE_NAME = 'youtube:truncated_id'
3396 IE_DESC = False # Do not list
b95aab84 3397 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3398
3399 _TESTS = [{
3400 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3401 'only_matching': True,
3402 }]
3403
3404 def _real_extract(self, url):
3405 video_id = self._match_id(url)
3406 raise ExtractorError(
3407 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3408 expected=True)