]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[downloader] Fix ffmpeg selection for m3u8_native
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
de7f3446 56class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
57 """Provide base functions for Youtube extractors"""
58 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 59 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
60
61 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
62 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
63 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 64
3462ffa8 65 _RESERVED_NAMES = (
cd7c66cf 66 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
67 r'movies|results|shared|hashtag|trending|feed|feeds|'
68 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 69
b2e8bc1b
JMF
70 _NETRC_MACHINE = 'youtube'
71 # If True it will raise an error if no login info is provided
72 _LOGIN_REQUIRED = False
73
70d5c17b 74 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 75
25f14e9f
S
76 def _ids_to_results(self, ids):
77 return [
78 self.url_result(vid_id, 'Youtube', video_id=vid_id)
79 for vid_id in ids]
80
b2e8bc1b 81 def _login(self):
83317f69 82 """
83 Attempt to log in to YouTube.
84 True is returned if successful or skipped.
85 False is returned if login failed.
86
87 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 """
68217024 89 username, password = self._get_login_info()
b2e8bc1b
JMF
90 # No authentication to be performed
91 if username is None:
70d35d16 92 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 93 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 94 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
95 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 96 return True
b2e8bc1b 97
7cc3570e
PH
98 login_page = self._download_webpage(
99 self._LOGIN_URL, None,
69ea8ca4
PH
100 note='Downloading login page',
101 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
102 if login_page is False:
103 return
b2e8bc1b 104
1212e997 105 login_form = self._hidden_inputs(login_page)
c5e8d7af 106
e00eb564
S
107 def req(url, f_req, note, errnote):
108 data = login_form.copy()
109 data.update({
110 'pstMsg': 1,
111 'checkConnection': 'youtube',
112 'checkedDomains': 'youtube',
113 'hl': 'en',
114 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 115 'f.req': json.dumps(f_req),
e00eb564
S
116 'flowName': 'GlifWebSignIn',
117 'flowEntry': 'ServiceLogin',
baf67a60
S
118 # TODO: reverse actual botguard identifier generation algo
119 'bgRequest': '["identifier",""]',
041bc3ad 120 })
e00eb564
S
121 return self._download_json(
122 url, None, note=note, errnote=errnote,
123 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
124 fatal=False,
125 data=urlencode_postdata(data), headers={
126 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
127 'Google-Accounts-XSRF': 1,
128 })
129
3995d37d
S
130 def warn(message):
131 self._downloader.report_warning(message)
132
133 lookup_req = [
134 username,
135 None, [], None, 'US', None, None, 2, False, True,
136 [
137 None, None,
138 [2, 1, None, 1,
139 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
140 None, [], 4],
141 1, [None, None, []], None, None, None, True
142 ],
143 username,
144 ]
145
e00eb564 146 lookup_results = req(
3995d37d 147 self._LOOKUP_URL, lookup_req,
e00eb564
S
148 'Looking up account info', 'Unable to look up account info')
149
150 if lookup_results is False:
151 return False
041bc3ad 152
3995d37d
S
153 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
154 if not user_hash:
155 warn('Unable to extract user hash')
156 return False
157
158 challenge_req = [
159 user_hash,
160 None, 1, None, [1, None, None, None, [password, None, True]],
161 [
162 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ]]
83317f69 165
3995d37d
S
166 challenge_results = req(
167 self._CHALLENGE_URL, challenge_req,
168 'Logging in', 'Unable to log in')
83317f69 169
3995d37d 170 if challenge_results is False:
e00eb564 171 return
83317f69 172
3995d37d
S
173 login_res = try_get(challenge_results, lambda x: x[0][5], list)
174 if login_res:
175 login_msg = try_get(login_res, lambda x: x[5], compat_str)
176 warn(
177 'Unable to login: %s' % 'Invalid password'
178 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
179 return False
180
181 res = try_get(challenge_results, lambda x: x[0][-1], list)
182 if not res:
183 warn('Unable to extract result entry')
184 return False
185
9a6628aa
S
186 login_challenge = try_get(res, lambda x: x[0][0], list)
187 if login_challenge:
188 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
189 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
190 # SEND_SUCCESS - TFA code has been successfully sent to phone
191 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 192 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
193 if status == 'QUOTA_EXCEEDED':
194 warn('Exceeded the limit of TFA codes, try later')
195 return False
196
197 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
198 if not tl:
199 warn('Unable to extract TL')
200 return False
201
202 tfa_code = self._get_tfa_info('2-step verification code')
203
204 if not tfa_code:
205 warn(
206 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
207 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
208 return False
209
210 tfa_code = remove_start(tfa_code, 'G-')
211
212 tfa_req = [
213 user_hash, None, 2, None,
214 [
215 9, None, None, None, None, None, None, None,
216 [None, tfa_code, True, 2]
217 ]]
218
219 tfa_results = req(
220 self._TFA_URL.format(tl), tfa_req,
221 'Submitting TFA code', 'Unable to submit TFA code')
222
223 if tfa_results is False:
224 return False
225
226 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
227 if tfa_res:
228 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
229 warn(
230 'Unable to finish TFA: %s' % 'Invalid TFA code'
231 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
232 return False
233
234 check_cookie_url = try_get(
235 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
236 else:
237 CHALLENGES = {
238 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
239 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
240 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
241 }
242 challenge = CHALLENGES.get(
243 challenge_str,
244 '%s returned error %s.' % (self.IE_NAME, challenge_str))
245 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
246 return False
3995d37d
S
247 else:
248 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
249
250 if not check_cookie_url:
251 warn('Unable to extract CheckCookie URL')
252 return False
e00eb564
S
253
254 check_cookie_results = self._download_webpage(
3995d37d
S
255 check_cookie_url, None, 'Checking cookie', fatal=False)
256
257 if check_cookie_results is False:
258 return False
e00eb564 259
3995d37d
S
260 if 'https://myaccount.google.com/' not in check_cookie_results:
261 warn('Unable to log in')
b2e8bc1b 262 return False
e00eb564 263
b2e8bc1b
JMF
264 return True
265
cce889b9 266 def _initialize_consent(self):
267 cookies = self._get_cookies('https://www.youtube.com/')
268 if cookies.get('__Secure-3PSID'):
269 return
270 consent_id = None
271 consent = cookies.get('CONSENT')
272 if consent:
273 if 'YES' in consent.value:
274 return
275 consent_id = self._search_regex(
276 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
277 if not consent_id:
278 consent_id = random.randint(100, 999)
279 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 280
b2e8bc1b 281 def _real_initialize(self):
cce889b9 282 self._initialize_consent()
b2e8bc1b
JMF
283 if self._downloader is None:
284 return
b2e8bc1b
JMF
285 if not self._login():
286 return
c5e8d7af 287
f4f751af 288 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
289 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 290 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 291 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
292 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 293
a5c56234
M
294 def _generate_sapisidhash_header(self):
295 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
296 if sapisid_cookie is None:
297 return
298 time_now = round(time.time())
299 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
300 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
301
302 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 303 note='Downloading API JSON', errnote='Unable to download API page',
304 context=None, api_key=None):
305
306 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 307 data.update(query)
f4f751af 308 real_headers = self._generate_api_headers()
309 real_headers.update({'content-type': 'application/json'})
310 if headers:
311 real_headers.update(headers)
545cc85d 312 return self._download_json(
a5c56234
M
313 'https://www.youtube.com/youtubei/v1/%s' % ep,
314 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 315 data=json.dumps(data).encode('utf8'), headers=real_headers,
316 query={'key': api_key or self._extract_api_key()})
317
318 def _extract_api_key(self, ytcfg=None):
319 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 320
8bdd16b4 321 def _extract_yt_initial_data(self, video_id, webpage):
322 return self._parse_json(
323 self._search_regex(
29f7c58a 324 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 325 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 326 video_id)
0c148415 327
a1c5d2ca
M
328 def _extract_identity_token(self, webpage, item_id):
329 ytcfg = self._extract_ytcfg(item_id, webpage)
330 if ytcfg:
331 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
332 if token:
333 return token
334 return self._search_regex(
335 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
336 'identity token', default=None)
337
338 @staticmethod
339 def _extract_account_syncid(data):
8ea3f7b9 340 """
341 Extract syncId required to download private playlists of secondary channels
342 @param data Either response or ytcfg
343 """
344 sync_ids = (try_get(
345 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
346 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
347 if len(sync_ids) >= 2 and sync_ids[1]:
348 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
349 # and just "user_syncid||" for primary channel. We only want the channel_syncid
350 return sync_ids[0]
8ea3f7b9 351 # ytcfg includes channel_syncid if on secondary channel
352 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 353
29f7c58a 354 def _extract_ytcfg(self, video_id, webpage):
355 return self._parse_json(
356 self._search_regex(
357 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 358 default='{}'), video_id, fatal=False) or {}
359
360 def __extract_client_version(self, ytcfg):
361 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
362
363 def _extract_context(self, ytcfg=None):
364 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
365 if context:
366 return context
367
368 # Recreate the client context (required)
369 client_version = self.__extract_client_version(ytcfg)
370 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
371 context = {
372 'client': {
373 'clientName': client_name,
374 'clientVersion': client_version,
375 }
376 }
377 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
378 if visitor_data:
379 context['client']['visitorData'] = visitor_data
380 return context
381
382 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
383 headers = {
384 'X-YouTube-Client-Name': '1',
385 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
386 }
387 if identity_token:
388 headers['x-youtube-identity-token'] = identity_token
389 if account_syncid:
390 headers['X-Goog-PageId'] = account_syncid
391 headers['X-Goog-AuthUser'] = 0
392 if visitor_data:
393 headers['x-goog-visitor-id'] = visitor_data
394 auth = self._generate_sapisidhash_header()
395 if auth is not None:
396 headers['Authorization'] = auth
397 headers['X-Origin'] = 'https://www.youtube.com'
398 return headers
29f7c58a 399
30a074c2 400 def _extract_video(self, renderer):
401 video_id = renderer.get('videoId')
402 title = try_get(
403 renderer,
404 (lambda x: x['title']['runs'][0]['text'],
405 lambda x: x['title']['simpleText']), compat_str)
406 description = try_get(
407 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
408 compat_str)
409 duration = parse_duration(try_get(
410 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
411 view_count_text = try_get(
412 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
413 view_count = str_to_int(self._search_regex(
414 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
415 'view count', default=None))
416 uploader = try_get(
bc2ca1bb 417 renderer,
418 (lambda x: x['ownerText']['runs'][0]['text'],
419 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 420 return {
39ed931e 421 '_type': 'url',
30a074c2 422 'ie_key': YoutubeIE.ie_key(),
423 'id': video_id,
424 'url': video_id,
425 'title': title,
426 'description': description,
427 'duration': duration,
428 'view_count': view_count,
429 'uploader': uploader,
430 }
431
0c148415 432
360e1ca5 433class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 434 IE_DESC = 'YouTube.com'
bc2ca1bb 435 _INVIDIOUS_SITES = (
436 # invidious-redirect websites
437 r'(?:www\.)?redirect\.invidious\.io',
438 r'(?:(?:www|dev)\.)?invidio\.us',
439 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
440 r'(?:www\.)?invidious\.pussthecat\.org',
441 r'(?:www\.)?invidious\.048596\.xyz',
442 r'(?:www\.)?invidious\.zee\.li',
443 r'(?:www\.)?vid\.puffyan\.us',
444 r'(?:(?:www|au)\.)?ytprivate\.com',
445 r'(?:www\.)?invidious\.namazso\.eu',
446 r'(?:www\.)?invidious\.ethibox\.fr',
447 r'(?:www\.)?inv\.skyn3t\.in',
448 r'(?:www\.)?invidious\.himiko\.cloud',
449 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
450 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
451 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
452 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
453 # youtube-dl invidious instances list
454 r'(?:(?:www|no)\.)?invidiou\.sh',
455 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
456 r'(?:www\.)?invidious\.kabi\.tk',
457 r'(?:www\.)?invidious\.13ad\.de',
458 r'(?:www\.)?invidious\.mastodon\.host',
459 r'(?:www\.)?invidious\.zapashcanon\.fr',
460 r'(?:www\.)?invidious\.kavin\.rocks',
461 r'(?:www\.)?invidious\.tube',
462 r'(?:www\.)?invidiou\.site',
463 r'(?:www\.)?invidious\.site',
464 r'(?:www\.)?invidious\.xyz',
465 r'(?:www\.)?invidious\.nixnet\.xyz',
466 r'(?:www\.)?invidious\.drycat\.fr',
467 r'(?:www\.)?tube\.poal\.co',
468 r'(?:www\.)?tube\.connect\.cafe',
469 r'(?:www\.)?vid\.wxzm\.sx',
470 r'(?:www\.)?vid\.mint\.lgbt',
471 r'(?:www\.)?yewtu\.be',
472 r'(?:www\.)?yt\.elukerio\.org',
473 r'(?:www\.)?yt\.lelux\.fi',
474 r'(?:www\.)?invidious\.ggc-project\.de',
475 r'(?:www\.)?yt\.maisputain\.ovh',
476 r'(?:www\.)?invidious\.toot\.koeln',
477 r'(?:www\.)?invidious\.fdn\.fr',
478 r'(?:www\.)?watch\.nettohikari\.com',
479 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
480 r'(?:www\.)?qklhadlycap4cnod\.onion',
481 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
482 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
483 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
484 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
485 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
486 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
487 )
cb7dfeea 488 _VALID_URL = r"""(?x)^
c5e8d7af 489 (
edb53e2d 490 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 491 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
492 (?:www\.)?deturl\.com/www\.youtube\.com|
493 (?:www\.)?pwnyoutube\.com|
494 (?:www\.)?hooktube\.com|
495 (?:www\.)?yourepeat\.com|
496 tube\.majestyc\.net|
497 %(invidious)s|
498 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
499 (?:.*?\#/)? # handle anchor (#/) redirect urls
500 (?: # the various things that can precede the ID:
ac7553d0 501 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 502 |(?: # or the v= param in all its forms
f7000f3a 503 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 504 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 505 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
506 v=
507 )
f4b05232 508 ))
cbaed4bb
S
509 |(?:
510 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
511 vid\.plus| # or vid.plus/xxxx
512 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 513 %(invidious)s
cbaed4bb 514 )/
edb53e2d 515 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 516 )
c5e8d7af 517 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 518 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
519 (?!.*?\blist=
520 (?:
521 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
522 WL # WL are handled by the watch later IE
523 )
524 )
c5e8d7af 525 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 526 $""" % {
527 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
528 'invidious': '|'.join(_INVIDIOUS_SITES),
529 }
e40c758c 530 _PLAYER_INFO_RE = (
cc2db878 531 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
532 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 533 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 534 )
2c62dc26 535 _formats = {
c2d3cb4c 536 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
537 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
538 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
539 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
540 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
541 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
542 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
543 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 544 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 545 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
546 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
547 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
548 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
549 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
550 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 551 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 552 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
553 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 554
555
556 # 3D videos
c2d3cb4c 557 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
558 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
559 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
560 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 561 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
562 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
563 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 564
96fb5605 565 # Apple HTTP Live Streaming
11f12195 566 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 567 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
568 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
569 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
570 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
571 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 572 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
573 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
574
575 # DASH mp4 video
d23028a8
S
576 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 581 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
582 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
583 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
586 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
587 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 588
f6f1fc92 589 # Dash mp4 audio
d23028a8
S
590 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
591 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
592 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
593 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
594 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
595 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
596 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
597
598 # Dash webm
d23028a8
S
599 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
600 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
606 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 614 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
615 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
616 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
617 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
620 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
621
622 # Dash webm audio
d23028a8
S
623 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
624 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 625
0857baad 626 # Dash webm audio with opus inside
d23028a8
S
627 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
628 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
629 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 630
ce6b9a2d
PH
631 # RTMP (unnamed)
632 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
633
634 # av01 video only formats sometimes served with "unknown" codecs
635 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
636 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 639 }
29f7c58a 640 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 641
fd5c4aab
S
642 _GEO_BYPASS = False
643
78caa52a 644 IE_NAME = 'youtube'
2eb88d95
PH
645 _TESTS = [
646 {
2d3d2997 647 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
648 'info_dict': {
649 'id': 'BaW_jenozKc',
650 'ext': 'mp4',
3867038a 651 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
652 'uploader': 'Philipp Hagemeister',
653 'uploader_id': 'phihag',
ec85ded8 654 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
655 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
656 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 657 'upload_date': '20121002',
3867038a 658 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 659 'categories': ['Science & Technology'],
3867038a 660 'tags': ['youtube-dl'],
556dbe7f 661 'duration': 10,
dbdaaa23 662 'view_count': int,
3e7c1224
PH
663 'like_count': int,
664 'dislike_count': int,
7c80519c 665 'start_time': 1,
297a564b 666 'end_time': 9,
2eb88d95 667 }
0e853ca4 668 },
fccd3771 669 {
4bc3a23e
PH
670 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
671 'note': 'Embed-only video (#1746)',
672 'info_dict': {
673 'id': 'yZIXLfi8CZQ',
674 'ext': 'mp4',
675 'upload_date': '20120608',
676 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
677 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
678 'uploader': 'SET India',
94bfcd23 679 'uploader_id': 'setindia',
ec85ded8 680 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 681 'age_limit': 18,
545cc85d 682 },
683 'skip': 'Private video',
fccd3771 684 },
11b56058 685 {
8bdd16b4 686 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
687 'note': 'Use the first video ID in the URL',
688 'info_dict': {
689 'id': 'BaW_jenozKc',
690 'ext': 'mp4',
3867038a 691 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
692 'uploader': 'Philipp Hagemeister',
693 'uploader_id': 'phihag',
ec85ded8 694 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 695 'upload_date': '20121002',
3867038a 696 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 697 'categories': ['Science & Technology'],
3867038a 698 'tags': ['youtube-dl'],
556dbe7f 699 'duration': 10,
dbdaaa23 700 'view_count': int,
11b56058
PM
701 'like_count': int,
702 'dislike_count': int,
34a7de29
S
703 },
704 'params': {
705 'skip_download': True,
706 },
11b56058 707 },
dd27fd17 708 {
2d3d2997 709 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
710 'note': '256k DASH audio (format 141) via DASH manifest',
711 'info_dict': {
712 'id': 'a9LDPn-MO4I',
713 'ext': 'm4a',
714 'upload_date': '20121002',
715 'uploader_id': '8KVIDEO',
ec85ded8 716 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
717 'description': '',
718 'uploader': '8KVIDEO',
719 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 720 },
4bc3a23e
PH
721 'params': {
722 'youtube_include_dash_manifest': True,
723 'format': '141',
4919603f 724 },
de3c7fe0 725 'skip': 'format 141 not served anymore',
dd27fd17 726 },
8bdd16b4 727 # DASH manifest with encrypted signature
728 {
729 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
730 'info_dict': {
731 'id': 'IB3lcPjvWLA',
732 'ext': 'm4a',
733 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
734 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
735 'duration': 244,
736 'uploader': 'AfrojackVEVO',
737 'uploader_id': 'AfrojackVEVO',
738 'upload_date': '20131011',
cc2db878 739 'abr': 129.495,
8bdd16b4 740 },
741 'params': {
742 'youtube_include_dash_manifest': True,
743 'format': '141/bestaudio[ext=m4a]',
744 },
745 },
aa79ac0c
PH
746 # Controversy video
747 {
748 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
749 'info_dict': {
750 'id': 'T4XJQO3qol8',
751 'ext': 'mp4',
556dbe7f 752 'duration': 219,
aa79ac0c 753 'upload_date': '20100909',
4fe54c12 754 'uploader': 'Amazing Atheist',
aa79ac0c 755 'uploader_id': 'TheAmazingAtheist',
ec85ded8 756 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 757 'title': 'Burning Everyone\'s Koran',
545cc85d 758 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 759 }
c522adb1 760 },
dd2d55f1 761 # Normal age-gate video (embed allowed)
c522adb1 762 {
2d3d2997 763 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
764 'info_dict': {
765 'id': 'HtVdAasjOgU',
766 'ext': 'mp4',
767 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 768 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 769 'duration': 142,
c522adb1
JMF
770 'uploader': 'The Witcher',
771 'uploader_id': 'WitcherGame',
ec85ded8 772 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 773 'upload_date': '20140605',
34952f09 774 'age_limit': 18,
c522adb1
JMF
775 },
776 },
8bdd16b4 777 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
778 # YouTube Red ad is not captured for creator
779 {
780 'url': '__2ABJjxzNo',
781 'info_dict': {
782 'id': '__2ABJjxzNo',
783 'ext': 'mp4',
784 'duration': 266,
785 'upload_date': '20100430',
786 'uploader_id': 'deadmau5',
787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 788 'creator': 'deadmau5',
789 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 790 'uploader': 'deadmau5',
791 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 792 'alt_title': 'Some Chords',
8bdd16b4 793 },
794 'expected_warnings': [
795 'DASH manifest missing',
796 ]
797 },
067aa17e 798 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
799 {
800 'url': 'lqQg6PlCWgI',
801 'info_dict': {
802 'id': 'lqQg6PlCWgI',
803 'ext': 'mp4',
556dbe7f 804 'duration': 6085,
90227264 805 'upload_date': '20150827',
cbe2bd91 806 'uploader_id': 'olympic',
ec85ded8 807 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 808 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 809 'uploader': 'Olympic',
cbe2bd91
PH
810 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
811 },
812 'params': {
813 'skip_download': 'requires avconv',
e52a40ab 814 }
cbe2bd91 815 },
6271f1ca
PH
816 # Non-square pixels
817 {
818 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
819 'info_dict': {
820 'id': '_b-2C3KPAM0',
821 'ext': 'mp4',
822 'stretched_ratio': 16 / 9.,
556dbe7f 823 'duration': 85,
6271f1ca
PH
824 'upload_date': '20110310',
825 'uploader_id': 'AllenMeow',
ec85ded8 826 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 827 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 828 'uploader': '孫ᄋᄅ',
6271f1ca
PH
829 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
830 },
06b491eb
S
831 },
832 # url_encoded_fmt_stream_map is empty string
833 {
834 'url': 'qEJwOuvDf7I',
835 'info_dict': {
836 'id': 'qEJwOuvDf7I',
f57b7835 837 'ext': 'webm',
06b491eb
S
838 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
839 'description': '',
840 'upload_date': '20150404',
841 'uploader_id': 'spbelect',
842 'uploader': 'Наблюдатели Петербурга',
843 },
844 'params': {
845 'skip_download': 'requires avconv',
e323cf3f
S
846 },
847 'skip': 'This live event has ended.',
06b491eb 848 },
067aa17e 849 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
850 {
851 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
852 'info_dict': {
853 'id': 'FIl7x6_3R5Y',
eb6793ba 854 'ext': 'webm',
da77d856
S
855 'title': 'md5:7b81415841e02ecd4313668cde88737a',
856 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 857 'duration': 220,
da77d856
S
858 'upload_date': '20150625',
859 'uploader_id': 'dorappi2000',
ec85ded8 860 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 861 'uploader': 'dorappi2000',
eb6793ba 862 'formats': 'mincount:31',
da77d856 863 },
eb6793ba 864 'skip': 'not actual anymore',
2ee8f5d8 865 },
8a1a26ce
YCH
866 # DASH manifest with segment_list
867 {
868 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
869 'md5': '8ce563a1d667b599d21064e982ab9e31',
870 'info_dict': {
871 'id': 'CsmdDsKjzN8',
872 'ext': 'mp4',
17ee98e1 873 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
874 'uploader': 'Airtek',
875 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
876 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
877 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
878 },
879 'params': {
880 'youtube_include_dash_manifest': True,
881 'format': '135', # bestvideo
be49068d
S
882 },
883 'skip': 'This live event has ended.',
2ee8f5d8 884 },
cf7e015f
S
885 {
886 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 887 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 888 'info_dict': {
545cc85d 889 'id': 'jvGDaLqkpTg',
890 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
891 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
892 },
893 'playlist': [{
894 'info_dict': {
545cc85d 895 'id': 'jvGDaLqkpTg',
cf7e015f 896 'ext': 'mp4',
545cc85d 897 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
898 'description': 'md5:e03b909557865076822aa169218d6a5d',
899 'duration': 10643,
900 'upload_date': '20161111',
901 'uploader': 'Team PGP',
902 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
904 },
905 }, {
906 'info_dict': {
545cc85d 907 'id': '3AKt1R1aDnw',
cf7e015f 908 'ext': 'mp4',
545cc85d 909 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
910 'description': 'md5:e03b909557865076822aa169218d6a5d',
911 'duration': 10991,
912 'upload_date': '20161111',
913 'uploader': 'Team PGP',
914 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
915 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
916 },
917 }, {
918 'info_dict': {
545cc85d 919 'id': 'RtAMM00gpVc',
cf7e015f 920 'ext': 'mp4',
545cc85d 921 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
922 'description': 'md5:e03b909557865076822aa169218d6a5d',
923 'duration': 10995,
924 'upload_date': '20161111',
925 'uploader': 'Team PGP',
926 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
927 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
928 },
929 }, {
930 'info_dict': {
545cc85d 931 'id': '6N2fdlP3C5U',
cf7e015f 932 'ext': 'mp4',
545cc85d 933 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
934 'description': 'md5:e03b909557865076822aa169218d6a5d',
935 'duration': 10990,
936 'upload_date': '20161111',
937 'uploader': 'Team PGP',
938 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
939 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
940 },
941 }],
942 'params': {
943 'skip_download': True,
944 },
cbaed4bb 945 },
f9f49d87 946 {
067aa17e 947 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
948 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
949 'info_dict': {
950 'id': 'gVfLd0zydlo',
951 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
952 },
953 'playlist_count': 2,
be49068d 954 'skip': 'Not multifeed anymore',
f9f49d87 955 },
cbaed4bb 956 {
2d3d2997 957 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 958 'only_matching': True,
0e49d9a6 959 },
6d4fc66b 960 {
2d3d2997 961 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
962 'only_matching': True,
963 },
0e49d9a6 964 {
067aa17e 965 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 966 # Also tests cut-off URL expansion in video description (see
067aa17e
S
967 # https://github.com/ytdl-org/youtube-dl/issues/1892,
968 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
969 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
970 'info_dict': {
971 'id': 'lsguqyKfVQg',
972 'ext': 'mp4',
973 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 974 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 975 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 976 'duration': 133,
0e49d9a6
LL
977 'upload_date': '20151119',
978 'uploader_id': 'IronSoulElf',
ec85ded8 979 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 980 'uploader': 'IronSoulElf',
eb6793ba
S
981 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
982 'track': 'Dark Walk - Position Music',
983 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 984 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
985 },
986 'params': {
987 'skip_download': True,
988 },
989 },
61f92af1 990 {
067aa17e 991 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
992 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
993 'only_matching': True,
994 },
313dfc45
LL
995 {
996 # Video with yt:stretch=17:0
997 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
998 'info_dict': {
999 'id': 'Q39EVAstoRM',
1000 'ext': 'mp4',
1001 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1002 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1003 'upload_date': '20151107',
1004 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1005 'uploader': 'CH GAMER DROID',
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
be49068d 1010 'skip': 'This video does not exist.',
313dfc45 1011 },
7caf9830
S
1012 {
1013 # Video licensed under Creative Commons
1014 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1015 'info_dict': {
1016 'id': 'M4gD1WSo5mA',
1017 'ext': 'mp4',
1018 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1019 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1020 'duration': 721,
7caf9830
S
1021 'upload_date': '20150127',
1022 'uploader_id': 'BerkmanCenter',
ec85ded8 1023 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1024 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1025 'license': 'Creative Commons Attribution license (reuse allowed)',
1026 },
1027 'params': {
1028 'skip_download': True,
1029 },
1030 },
fd050249
S
1031 {
1032 # Channel-like uploader_url
1033 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1034 'info_dict': {
1035 'id': 'eQcmzGIKrzg',
1036 'ext': 'mp4',
1037 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1038 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1039 'duration': 4060,
fd050249 1040 'upload_date': '20151119',
eb6793ba 1041 'uploader': 'Bernie Sanders',
fd050249 1042 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1043 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1044 'license': 'Creative Commons Attribution license (reuse allowed)',
1045 },
1046 'params': {
1047 'skip_download': True,
1048 },
1049 },
040ac686
S
1050 {
1051 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1052 'only_matching': True,
7f29cf54
S
1053 },
1054 {
067aa17e 1055 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1056 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1057 'only_matching': True,
6496ccb4
S
1058 },
1059 {
1060 # Rental video preview
1061 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1062 'info_dict': {
1063 'id': 'uGpuVWrhIzE',
1064 'ext': 'mp4',
1065 'title': 'Piku - Trailer',
1066 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1067 'upload_date': '20150811',
1068 'uploader': 'FlixMatrix',
1069 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1070 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1071 'license': 'Standard YouTube License',
1072 },
1073 'params': {
1074 'skip_download': True,
1075 },
eb6793ba 1076 'skip': 'This video is not available.',
022a5d66 1077 },
12afdc2a
S
1078 {
1079 # YouTube Red video with episode data
1080 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1081 'info_dict': {
1082 'id': 'iqKdEhx-dD4',
1083 'ext': 'mp4',
1084 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1085 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1086 'duration': 2085,
12afdc2a
S
1087 'upload_date': '20170118',
1088 'uploader': 'Vsauce',
1089 'uploader_id': 'Vsauce',
1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1091 'series': 'Mind Field',
1092 'season_number': 1,
1093 'episode_number': 1,
1094 },
1095 'params': {
1096 'skip_download': True,
1097 },
1098 'expected_warnings': [
1099 'Skipping DASH manifest',
1100 ],
1101 },
c7121fa7
S
1102 {
1103 # The following content has been identified by the YouTube community
1104 # as inappropriate or offensive to some audiences.
1105 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1106 'info_dict': {
1107 'id': '6SJNVb0GnPI',
1108 'ext': 'mp4',
1109 'title': 'Race Differences in Intelligence',
1110 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1111 'duration': 965,
1112 'upload_date': '20140124',
1113 'uploader': 'New Century Foundation',
1114 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1115 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1116 },
1117 'params': {
1118 'skip_download': True,
1119 },
545cc85d 1120 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1121 },
022a5d66
S
1122 {
1123 # itag 212
1124 'url': '1t24XAntNCY',
1125 'only_matching': True,
fd5c4aab
S
1126 },
1127 {
1128 # geo restricted to JP
1129 'url': 'sJL6WA-aGkQ',
1130 'only_matching': True,
1131 },
cd5a74a2
S
1132 {
1133 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1134 'only_matching': True,
1135 },
bc2ca1bb 1136 {
1137 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1138 'only_matching': True,
1139 },
1140 {
1141 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1142 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1143 'only_matching': True,
1144 },
825cd268
RA
1145 {
1146 # DRM protected
1147 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1148 'only_matching': True,
4fe54c12
S
1149 },
1150 {
1151 # Video with unsupported adaptive stream type formats
1152 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1153 'info_dict': {
1154 'id': 'Z4Vy8R84T1U',
1155 'ext': 'mp4',
1156 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1157 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1158 'duration': 433,
1159 'upload_date': '20130923',
1160 'uploader': 'Amelia Putri Harwita',
1161 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1162 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1163 'formats': 'maxcount:10',
1164 },
1165 'params': {
1166 'skip_download': True,
1167 'youtube_include_dash_manifest': False,
1168 },
5429d6a9 1169 'skip': 'not actual anymore',
5caabd3c 1170 },
1171 {
822b9d9c 1172 # Youtube Music Auto-generated description
5caabd3c 1173 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1174 'info_dict': {
1175 'id': 'MgNrAu2pzNs',
1176 'ext': 'mp4',
1177 'title': 'Voyeur Girl',
1178 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1179 'upload_date': '20190312',
5429d6a9
S
1180 'uploader': 'Stephen - Topic',
1181 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1182 'artist': 'Stephen',
1183 'track': 'Voyeur Girl',
1184 'album': 'it\'s too much love to know my dear',
1185 'release_date': '20190313',
1186 'release_year': 2019,
1187 },
1188 'params': {
1189 'skip_download': True,
1190 },
1191 },
66b48727
RA
1192 {
1193 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1194 'only_matching': True,
1195 },
011e75e6
S
1196 {
1197 # invalid -> valid video id redirection
1198 'url': 'DJztXj2GPfl',
1199 'info_dict': {
1200 'id': 'DJztXj2GPfk',
1201 'ext': 'mp4',
1202 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1203 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1204 'upload_date': '20090125',
1205 'uploader': 'Prochorowka',
1206 'uploader_id': 'Prochorowka',
1207 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1208 'artist': 'Panjabi MC',
1209 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1210 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1211 },
1212 'params': {
1213 'skip_download': True,
1214 },
545cc85d 1215 'skip': 'Video unavailable',
ea74e00b
DP
1216 },
1217 {
1218 # empty description results in an empty string
1219 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1220 'info_dict': {
1221 'id': 'x41yOUIvK2k',
1222 'ext': 'mp4',
1223 'title': 'IMG 3456',
1224 'description': '',
1225 'upload_date': '20170613',
1226 'uploader_id': 'ElevageOrVert',
1227 'uploader': 'ElevageOrVert',
1228 },
1229 'params': {
1230 'skip_download': True,
1231 },
1232 },
a0566bbf 1233 {
29f7c58a 1234 # with '};' inside yt initial data (see [1])
1235 # see [2] for an example with '};' inside ytInitialPlayerResponse
1236 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1237 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1238 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1239 'info_dict': {
1240 'id': 'CHqg6qOn4no',
1241 'ext': 'mp4',
1242 'title': 'Part 77 Sort a list of simple types in c#',
1243 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1244 'upload_date': '20130831',
1245 'uploader_id': 'kudvenkat',
1246 'uploader': 'kudvenkat',
1247 },
1248 'params': {
1249 'skip_download': True,
1250 },
1251 },
29f7c58a 1252 {
1253 # another example of '};' in ytInitialData
1254 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1255 'only_matching': True,
1256 },
1257 {
1258 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1259 'only_matching': True,
1260 },
545cc85d 1261 {
cc2db878 1262 # https://github.com/ytdl-org/youtube-dl/pull/28094
1263 'url': 'OtqTfy26tG0',
1264 'info_dict': {
1265 'id': 'OtqTfy26tG0',
1266 'ext': 'mp4',
1267 'title': 'Burn Out',
1268 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1269 'upload_date': '20141120',
1270 'uploader': 'The Cinematic Orchestra - Topic',
1271 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1272 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1273 'artist': 'The Cinematic Orchestra',
1274 'track': 'Burn Out',
1275 'album': 'Every Day',
1276 'release_data': None,
1277 'release_year': None,
1278 },
1279 'params': {
1280 'skip_download': True,
1281 },
545cc85d 1282 },
bc2ca1bb 1283 {
1284 # controversial video, only works with bpctr when authenticated with cookies
1285 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1286 'only_matching': True,
1287 },
f7ad7160 1288 {
1289 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1290 'url': 'cBvYw8_A0vQ',
1291 'info_dict': {
1292 'id': 'cBvYw8_A0vQ',
1293 'ext': 'mp4',
1294 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1295 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1296 'upload_date': '20201120',
1297 'uploader': 'Walk around Japan',
1298 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1299 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1300 },
1301 'params': {
1302 'skip_download': True,
1303 },
1304 },
2eb88d95
PH
1305 ]
1306
e0df6211
PH
1307 def __init__(self, *args, **kwargs):
1308 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1309 self._code_cache = {}
83799698 1310 self._player_cache = {}
e0df6211 1311
60064c53
PH
1312 def _signature_cache_id(self, example_sig):
1313 """ Return a string representation of a signature """
78caa52a 1314 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1315
e40c758c
S
1316 @classmethod
1317 def _extract_player_info(cls, player_url):
1318 for player_re in cls._PLAYER_INFO_RE:
1319 id_m = re.search(player_re, player_url)
1320 if id_m:
1321 break
1322 else:
c081b35c 1323 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1324 return id_m.group('id')
e40c758c
S
1325
1326 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1327 player_id = self._extract_player_info(player_url)
e0df6211 1328
c4417ddb 1329 # Read from filesystem cache
545cc85d 1330 func_id = 'js_%s_%s' % (
1331 player_id, self._signature_cache_id(example_sig))
c4417ddb 1332 assert os.path.basename(func_id) == func_id
a0e07d31 1333
69ea8ca4 1334 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1335 if cache_spec is not None:
78caa52a 1336 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1337
545cc85d 1338 if player_id not in self._code_cache:
1339 self._code_cache[player_id] = self._download_webpage(
e0df6211 1340 player_url, video_id,
545cc85d 1341 note='Downloading player ' + player_id,
69ea8ca4 1342 errnote='Download of %s failed' % player_url)
545cc85d 1343 code = self._code_cache[player_id]
1344 res = self._parse_sig_js(code)
e0df6211 1345
785521bf
PH
1346 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1347 cache_res = res(test_string)
1348 cache_spec = [ord(c) for c in cache_res]
83799698 1349
69ea8ca4 1350 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1351 return res
1352
60064c53 1353 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1354 def gen_sig_code(idxs):
1355 def _genslice(start, end, step):
78caa52a 1356 starts = '' if start == 0 else str(start)
8bcc8756 1357 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1358 steps = '' if step == 1 else (':%d' % step)
78caa52a 1359 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1360
1361 step = None
7af808a5
PH
1362 # Quelch pyflakes warnings - start will be set when step is set
1363 start = '(Never used)'
edf3e38e
PH
1364 for i, prev in zip(idxs[1:], idxs[:-1]):
1365 if step is not None:
1366 if i - prev == step:
1367 continue
1368 yield _genslice(start, prev, step)
1369 step = None
1370 continue
1371 if i - prev in [-1, 1]:
1372 step = i - prev
1373 start = prev
1374 continue
1375 else:
78caa52a 1376 yield 's[%d]' % prev
edf3e38e 1377 if step is None:
78caa52a 1378 yield 's[%d]' % i
edf3e38e
PH
1379 else:
1380 yield _genslice(start, i, step)
1381
78caa52a 1382 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1383 cache_res = func(test_string)
edf3e38e 1384 cache_spec = [ord(c) for c in cache_res]
78caa52a 1385 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1386 signature_id_tuple = '(%s)' % (
1387 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1388 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1389 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1390 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1391
e0df6211
PH
1392 def _parse_sig_js(self, jscode):
1393 funcname = self._search_regex(
abefc03f
S
1394 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1395 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1396 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1397 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1398 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1399 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1400 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1401 # Obsolete patterns
1402 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1403 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1404 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1405 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1406 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1407 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1408 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1409 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1410 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1411
1412 jsi = JSInterpreter(jscode)
1413 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1414 return lambda s: initial_function([s])
1415
545cc85d 1416 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1417 """Turn the encrypted s field into a working signature"""
6b37f0be 1418
c8bf86d5 1419 if player_url is None:
69ea8ca4 1420 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1421
69ea8ca4 1422 if player_url.startswith('//'):
78caa52a 1423 player_url = 'https:' + player_url
3c90cc8b
S
1424 elif not re.match(r'https?://', player_url):
1425 player_url = compat_urlparse.urljoin(
1426 'https://www.youtube.com', player_url)
c8bf86d5 1427 try:
62af3a0e 1428 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1429 if player_id not in self._player_cache:
1430 func = self._extract_signature_function(
60064c53 1431 video_id, player_url, s
c8bf86d5
PH
1432 )
1433 self._player_cache[player_id] = func
1434 func = self._player_cache[player_id]
1435 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1436 self._print_sig_code(func, s)
c8bf86d5
PH
1437 return func(s)
1438 except Exception as e:
1439 tb = traceback.format_exc()
1440 raise ExtractorError(
78caa52a 1441 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1442
545cc85d 1443 def _mark_watched(self, video_id, player_response):
21c340b8
S
1444 playback_url = url_or_none(try_get(
1445 player_response,
545cc85d 1446 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1447 if not playback_url:
1448 return
1449 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1450 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1451
1452 # cpn generation algorithm is reverse engineered from base.js.
1453 # In fact it works even with dummy cpn.
1454 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1455 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1456
1457 qs.update({
1458 'ver': ['2'],
1459 'cpn': [cpn],
1460 })
1461 playback_url = compat_urlparse.urlunparse(
15707c7e 1462 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1463
1464 self._download_webpage(
1465 playback_url, video_id, 'Marking watched',
1466 'Unable to mark watched', fatal=False)
1467
66c9fa36
S
1468 @staticmethod
1469 def _extract_urls(webpage):
1470 # Embedded YouTube player
1471 entries = [
1472 unescapeHTML(mobj.group('url'))
1473 for mobj in re.finditer(r'''(?x)
1474 (?:
1475 <iframe[^>]+?src=|
1476 data-video-url=|
1477 <embed[^>]+?src=|
1478 embedSWF\(?:\s*|
1479 <object[^>]+data=|
1480 new\s+SWFObject\(
1481 )
1482 (["\'])
1483 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1484 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1485 \1''', webpage)]
1486
1487 # lazyYT YouTube embed
1488 entries.extend(list(map(
1489 unescapeHTML,
1490 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1491
1492 # Wordpress "YouTube Video Importer" plugin
1493 matches = re.findall(r'''(?x)<div[^>]+
1494 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1495 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1496 entries.extend(m[-1] for m in matches)
1497
1498 return entries
1499
1500 @staticmethod
1501 def _extract_url(webpage):
1502 urls = YoutubeIE._extract_urls(webpage)
1503 return urls[0] if urls else None
1504
97665381
PH
1505 @classmethod
1506 def extract_id(cls, url):
1507 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1508 if mobj is None:
69ea8ca4 1509 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1510 video_id = mobj.group(2)
1511 return video_id
1512
545cc85d 1513 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1514 chapters_list = try_get(
8bdd16b4 1515 data,
84213ea8
S
1516 lambda x: x['playerOverlays']
1517 ['playerOverlayRenderer']
1518 ['decoratedPlayerBarRenderer']
1519 ['decoratedPlayerBarRenderer']
1520 ['playerBar']
1521 ['chapteredPlayerBarRenderer']
1522 ['chapters'],
1523 list)
1524 if not chapters_list:
1525 return
1526
1527 def chapter_time(chapter):
1528 return float_or_none(
1529 try_get(
1530 chapter,
1531 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1532 int),
1533 scale=1000)
1534 chapters = []
1535 for next_num, chapter in enumerate(chapters_list, start=1):
1536 start_time = chapter_time(chapter)
1537 if start_time is None:
1538 continue
1539 end_time = (chapter_time(chapters_list[next_num])
1540 if next_num < len(chapters_list) else duration)
1541 if end_time is None:
1542 continue
1543 title = try_get(
1544 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1545 compat_str)
1546 chapters.append({
1547 'start_time': start_time,
1548 'end_time': end_time,
1549 'title': title,
1550 })
1551 return chapters
1552
545cc85d 1553 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1554 return self._parse_json(self._search_regex(
1555 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1556 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1557
d92f5d5a 1558 @staticmethod
1559 def parse_time_text(time_text):
1560 """
1561 Parse the comment time text
1562 time_text is in the format 'X units ago (edited)'
1563 """
1564 time_text_split = time_text.split(' ')
1565 if len(time_text_split) >= 3:
1566 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1567
a1c5d2ca
M
1568 @staticmethod
1569 def _join_text_entries(runs):
1570 text = None
1571 for run in runs:
1572 if not isinstance(run, dict):
1573 continue
1574 sub_text = try_get(run, lambda x: x['text'], compat_str)
1575 if sub_text:
1576 if not text:
1577 text = sub_text
1578 continue
1579 text += sub_text
1580 return text
1581
1582 def _extract_comment(self, comment_renderer, parent=None):
1583 comment_id = comment_renderer.get('commentId')
1584 if not comment_id:
1585 return
1586 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1587 text = self._join_text_entries(comment_text_runs) or ''
1588 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1589 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1590 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1591 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1592 author_id = try_get(comment_renderer,
1593 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1594 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1595 lambda x: x['likeCount']), compat_str)) or 0
1596 author_thumbnail = try_get(comment_renderer,
1597 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1598
1599 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1600 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1601 return {
1602 'id': comment_id,
1603 'text': text,
d92f5d5a 1604 'timestamp': timestamp,
a1c5d2ca
M
1605 'time_text': time_text,
1606 'like_count': votes,
1607 'is_favorited': is_liked,
1608 'author': author,
1609 'author_id': author_id,
1610 'author_thumbnail': author_thumbnail,
1611 'author_is_uploader': author_is_uploader,
1612 'parent': parent or 'root'
1613 }
1614
1615 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1616 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1617
1618 def extract_thread(parent_renderer):
1619 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1620 if not parent:
1621 comment_counts[2] = 0
1622 for content in contents:
1623 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1624 comment_renderer = try_get(
1625 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1626 content, (lambda x: x['commentRenderer'], dict))
1627
1628 if not comment_renderer:
1629 continue
1630 comment = self._extract_comment(comment_renderer, parent)
1631 if not comment:
1632 continue
1633 comment_counts[0] += 1
1634 yield comment
1635 # Attempt to get the replies
1636 comment_replies_renderer = try_get(
1637 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1638
1639 if comment_replies_renderer:
1640 comment_counts[2] += 1
1641 comment_entries_iter = self._comment_entries(
f4f751af 1642 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1643 parent=comment.get('id'), session_token_list=session_token_list,
1644 comment_counts=comment_counts)
1645
1646 for reply_comment in comment_entries_iter:
1647 yield reply_comment
1648
1649 if not comment_counts:
1650 # comment so far, est. total comments, current comment thread #
1651 comment_counts = [0, 0, 0]
a1c5d2ca
M
1652
1653 # TODO: Generalize the download code with TabIE
f4f751af 1654 context = self._extract_context(ytcfg)
1655 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1656 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1657 first_continuation = False
1658 if parent is None:
1659 first_continuation = True
1660
1661 for page_num in itertools.count(0):
1662 if not continuation:
1663 break
f4f751af 1664 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1665 retries = self._downloader.params.get('extractor_retries', 3)
1666 count = -1
1667 last_error = None
1668
1669 while count < retries:
1670 count += 1
1671 if last_error:
1672 self.report_warning('%s. Retrying ...' % last_error)
1673 try:
1674 query = {
1675 'ctoken': continuation['ctoken'],
1676 'pbj': 1,
1677 'type': 'next',
1678 }
1679 if parent:
1680 query['action_get_comment_replies'] = 1
1681 else:
1682 query['action_get_comments'] = 1
1683
1684 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1685 if page_num == 0:
1686 if first_continuation:
d92f5d5a 1687 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1688 else:
d92f5d5a 1689 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1690 else:
d92f5d5a 1691 note_prefix = '%sDownloading comment%s page %d %s' % (
1692 ' ' if parent else '',
a1c5d2ca
M
1693 ' replies' if parent else '',
1694 page_num,
1695 comment_prog_str)
1696
1697 browse = self._download_json(
1698 'https://www.youtube.com/comment_service_ajax', None,
1699 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1700 headers=headers, query=query,
1701 data=urlencode_postdata({
1702 'session_token': session_token_list[0]
1703 }))
1704 except ExtractorError as e:
1705 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1706 if e.cause.code == 413:
d92f5d5a 1707 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1708 return
1709 # Downloading page may result in intermittent 5xx HTTP error
1710 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1711 last_error = 'HTTP Error %s' % e.cause.code
1712 if e.cause.code == 404:
d92f5d5a 1713 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1714 if count < retries:
1715 continue
1716 raise
1717 else:
1718 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1719 if session_token:
1720 session_token_list[0] = session_token
1721
1722 response = try_get(browse,
1723 (lambda x: x['response'],
1724 lambda x: x[1]['response'])) or {}
1725
1726 if response.get('continuationContents'):
1727 break
1728
1729 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1730 if browse.get('reload'):
d92f5d5a 1731 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1732
1733 # TODO: not tested, merged from old extractor
1734 err_msg = browse.get('externalErrorMessage')
1735 if err_msg:
1736 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1737
1738 # Youtube sometimes sends incomplete data
1739 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1740 last_error = 'Incomplete data received'
1741 if count >= retries:
1742 self._downloader.report_error(last_error)
1743
1744 if not response:
1745 break
f4f751af 1746 visitor_data = try_get(
1747 response,
1748 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1749 compat_str) or visitor_data
a1c5d2ca
M
1750
1751 known_continuation_renderers = {
1752 'itemSectionContinuation': extract_thread,
1753 'commentRepliesContinuation': extract_thread
1754 }
1755
1756 # extract next root continuation from the results
1757 continuation_contents = try_get(
1758 response, lambda x: x['continuationContents'], dict) or {}
1759
1760 for key, value in continuation_contents.items():
1761 if key not in known_continuation_renderers:
1762 continue
1763 continuation_renderer = value
1764
1765 if first_continuation:
1766 first_continuation = False
1767 expected_comment_count = try_get(
1768 continuation_renderer,
1769 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1770 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1771 compat_str)
1772
1773 if expected_comment_count:
1774 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1775 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1776 yield comment_counts[1]
1777
1778 # TODO: cli arg.
1779 # 1/True for newest, 0/False for popular (default)
1780 comment_sort_index = int(True)
1781 sort_continuation_renderer = try_get(
1782 continuation_renderer,
1783 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1784 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1785 # If this fails, the initial continuation page
1786 # starts off with popular anyways.
1787 if sort_continuation_renderer:
1788 continuation = YoutubeTabIE._build_continuation_query(
1789 continuation=sort_continuation_renderer.get('continuation'),
1790 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1791 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1792 break
1793
1794 for entry in known_continuation_renderers[key](continuation_renderer):
1795 yield entry
1796
1797 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1798 break
1799
1800 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1801 """Entry for comment extraction"""
1802 comments = []
1803 known_entry_comment_renderers = (
1804 'itemSectionRenderer',
1805 )
1806 estimated_total = 0
1807 for entry in contents:
1808 for key, renderer in entry.items():
1809 if key not in known_entry_comment_renderers:
1810 continue
1811
1812 comment_iter = self._comment_entries(
1813 renderer,
1814 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1815 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1816 ytcfg=ytcfg,
a1c5d2ca
M
1817 session_token_list=[xsrf_token])
1818
1819 for comment in comment_iter:
1820 if isinstance(comment, int):
1821 estimated_total = comment
1822 continue
1823 comments.append(comment)
1824 break
d92f5d5a 1825 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1826 return {
1827 'comments': comments,
1828 'comment_count': len(comments),
1829 }
1830
c5e8d7af 1831 def _real_extract(self, url):
cf7e015f 1832 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1833 video_id = self._match_id(url)
1834 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1835 webpage_url = base_url + 'watch?v=' + video_id
1836 webpage = self._download_webpage(
cce889b9 1837 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1838
1839 player_response = None
1840 if webpage:
1841 player_response = self._extract_yt_initial_variable(
1842 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1843 video_id, 'initial player response')
f4f751af 1844
1845 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1846 if not player_response:
1847 player_response = self._call_api(
f4f751af 1848 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1849
1850 playability_status = player_response.get('playabilityStatus') or {}
1851 if playability_status.get('reason') == 'Sign in to confirm your age':
1852 pr = self._parse_json(try_get(compat_parse_qs(
1853 self._download_webpage(
1854 base_url + 'get_video_info', video_id,
1855 'Refetching age-gated info webpage',
1856 'unable to download video info webpage', query={
1857 'video_id': video_id,
7c60c33e 1858 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1859 }, fatal=False)),
1860 lambda x: x['player_response'][0],
1861 compat_str) or '{}', video_id)
1862 if pr:
1863 player_response = pr
1864
1865 trailer_video_id = try_get(
1866 playability_status,
1867 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1868 compat_str)
1869 if trailer_video_id:
1870 return self.url_result(
1871 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1872
545cc85d 1873 def get_text(x):
1874 if not x:
c2d125d9 1875 return
f7ad7160 1876 text = x.get('simpleText')
1877 if text and isinstance(text, compat_str):
1878 return text
1879 runs = x.get('runs')
1880 if not isinstance(runs, list):
1881 return
1882 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1883
545cc85d 1884 search_meta = (
1885 lambda x: self._html_search_meta(x, webpage, default=None)) \
1886 if webpage else lambda x: None
dbdaaa23 1887
545cc85d 1888 video_details = player_response.get('videoDetails') or {}
37357d21 1889 microformat = try_get(
545cc85d 1890 player_response,
1891 lambda x: x['microformat']['playerMicroformatRenderer'],
1892 dict) or {}
1893 video_title = video_details.get('title') \
1894 or get_text(microformat.get('title')) \
1895 or search_meta(['og:title', 'twitter:title', 'title'])
1896 video_description = video_details.get('shortDescription')
cf7e015f 1897
8fe10494 1898 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1899 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1900 multifeed_metadata_list = try_get(
1901 player_response,
1902 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1903 compat_str)
8fe10494
S
1904 if multifeed_metadata_list:
1905 entries = []
1906 feed_ids = []
1907 for feed in multifeed_metadata_list.split(','):
1908 # Unquote should take place before split on comma (,) since textual
1909 # fields may contain comma as well (see
067aa17e 1910 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1911 feed_data = compat_parse_qs(
1912 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1913
1914 def feed_entry(name):
545cc85d 1915 return try_get(
1916 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1917
1918 feed_id = feed_entry('id')
1919 if not feed_id:
1920 continue
1921 feed_title = feed_entry('title')
1922 title = video_title
1923 if feed_title:
1924 title += ' (%s)' % feed_title
8fe10494
S
1925 entries.append({
1926 '_type': 'url_transparent',
1927 'ie_key': 'Youtube',
1928 'url': smuggle_url(
545cc85d 1929 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1930 {'force_singlefeed': True}),
6b09401b 1931 'title': title,
8fe10494 1932 })
6b09401b 1933 feed_ids.append(feed_id)
8fe10494
S
1934 self.to_screen(
1935 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1936 % (', '.join(feed_ids), video_id))
545cc85d 1937 return self.playlist_result(
1938 entries, video_id, video_title, video_description)
8fe10494
S
1939 else:
1940 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1941
545cc85d 1942 formats = []
1943 itags = []
cc2db878 1944 itag_qualities = {}
545cc85d 1945 player_url = None
dca3ff4a 1946 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1947 streaming_data = player_response.get('streamingData') or {}
1948 streaming_formats = streaming_data.get('formats') or []
1949 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1950 for fmt in streaming_formats:
1951 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1952 continue
321bf820 1953
cc2db878 1954 itag = str_or_none(fmt.get('itag'))
1955 quality = fmt.get('quality')
1956 if itag and quality:
1957 itag_qualities[itag] = quality
1958 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1959 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1960 # number of fragment that would subsequently requested with (`&sq=N`)
1961 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1962 continue
1963
545cc85d 1964 fmt_url = fmt.get('url')
1965 if not fmt_url:
1966 sc = compat_parse_qs(fmt.get('signatureCipher'))
1967 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1968 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1969 if not (sc and fmt_url and encrypted_sig):
1970 continue
1971 if not player_url:
1972 if not webpage:
1973 continue
1974 player_url = self._search_regex(
1975 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1976 webpage, 'player URL', fatal=False)
1977 if not player_url:
201e9eaa 1978 continue
545cc85d 1979 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1980 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1981 fmt_url += '&' + sp + '=' + signature
1982
545cc85d 1983 if itag:
1984 itags.append(itag)
cc2db878 1985 tbr = float_or_none(
1986 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1987 dct = {
1988 'asr': int_or_none(fmt.get('audioSampleRate')),
1989 'filesize': int_or_none(fmt.get('contentLength')),
1990 'format_id': itag,
1991 'format_note': fmt.get('qualityLabel') or quality,
1992 'fps': int_or_none(fmt.get('fps')),
1993 'height': int_or_none(fmt.get('height')),
dca3ff4a 1994 'quality': q(quality),
cc2db878 1995 'tbr': tbr,
545cc85d 1996 'url': fmt_url,
1997 'width': fmt.get('width'),
1998 }
1999 mimetype = fmt.get('mimeType')
2000 if mimetype:
2001 mobj = re.match(
2002 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2003 if mobj:
2004 dct['ext'] = mimetype2ext(mobj.group(1))
2005 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2006 no_audio = dct.get('acodec') == 'none'
2007 no_video = dct.get('vcodec') == 'none'
2008 if no_audio:
2009 dct['vbr'] = tbr
2010 if no_video:
2011 dct['abr'] = tbr
2012 if no_audio or no_video:
545cc85d 2013 dct['downloader_options'] = {
2014 # Youtube throttles chunks >~10M
2015 'http_chunk_size': 10485760,
bf1317d2 2016 }
7c60c33e 2017 if dct.get('ext'):
2018 dct['container'] = dct['ext'] + '_dash'
545cc85d 2019 formats.append(dct)
2020
2021 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2022 if hls_manifest_url:
2023 for f in self._extract_m3u8_formats(
2024 hls_manifest_url, video_id, 'mp4', fatal=False):
2025 itag = self._search_regex(
2026 r'/itag/(\d+)', f['url'], 'itag', default=None)
2027 if itag:
2028 f['format_id'] = itag
2029 formats.append(f)
2030
1418a043 2031 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2032 dash_manifest_url = streaming_data.get('dashManifestUrl')
2033 if dash_manifest_url:
545cc85d 2034 for f in self._extract_mpd_formats(
2035 dash_manifest_url, video_id, fatal=False):
cc2db878 2036 itag = f['format_id']
2037 if itag in itags:
2038 continue
dca3ff4a 2039 if itag in itag_qualities:
2040 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2041 # but kept to maintain feature parity (and code similarity) with youtube-dl
2042 # Remove if this causes any issues with sorting in future
2043 f['quality'] = q(itag_qualities[itag])
545cc85d 2044 filesize = int_or_none(self._search_regex(
2045 r'/clen/(\d+)', f.get('fragment_base_url')
2046 or f['url'], 'file size', default=None))
2047 if filesize:
2048 f['filesize'] = filesize
cc2db878 2049 formats.append(f)
bf1317d2 2050
545cc85d 2051 if not formats:
63ad4d43 2052 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 2053 raise ExtractorError(
2054 'This video is DRM protected.', expected=True)
2055 pemr = try_get(
2056 playability_status,
2057 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2058 dict) or {}
2059 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2060 subreason = pemr.get('subreason')
2061 if subreason:
2062 subreason = clean_html(get_text(subreason))
2063 if subreason == 'The uploader has not made this video available in your country.':
2064 countries = microformat.get('availableCountries')
2065 if not countries:
2066 regions_allowed = search_meta('regionsAllowed')
2067 countries = regions_allowed.split(',') if regions_allowed else None
2068 self.raise_geo_restricted(
2069 subreason, countries)
2070 reason += '\n' + subreason
2071 if reason:
2072 raise ExtractorError(reason, expected=True)
bf1317d2 2073
545cc85d 2074 self._sort_formats(formats)
bf1317d2 2075
545cc85d 2076 keywords = video_details.get('keywords') or []
2077 if not keywords and webpage:
2078 keywords = [
2079 unescapeHTML(m.group('content'))
2080 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2081 for keyword in keywords:
2082 if keyword.startswith('yt:stretch='):
46fff710 2083 stretch_ratio = map(
2084 lambda x: int_or_none(x, default=0),
2085 keyword.split('=')[1].split(':'))
2086 w, h = (list(stretch_ratio) + [0])[:2]
545cc85d 2087 if w > 0 and h > 0:
2088 ratio = w / h
2089 for f in formats:
2090 if f.get('vcodec') != 'none':
2091 f['stretched_ratio'] = ratio
6449cd80 2092
545cc85d 2093 thumbnails = []
2094 for container in (video_details, microformat):
2095 for thumbnail in (try_get(
2096 container,
2097 lambda x: x['thumbnail']['thumbnails'], list) or []):
2098 thumbnail_url = thumbnail.get('url')
2099 if not thumbnail_url:
bf1317d2 2100 continue
1988fab7 2101 # Sometimes youtube gives a wrong thumbnail URL. See:
2102 # https://github.com/yt-dlp/yt-dlp/issues/233
2103 # https://github.com/ytdl-org/youtube-dl/issues/28023
2104 if 'maxresdefault' in thumbnail_url:
2105 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2106 thumbnails.append({
2107 'height': int_or_none(thumbnail.get('height')),
2108 'url': thumbnail_url,
2109 'width': int_or_none(thumbnail.get('width')),
2110 })
2111 if thumbnails:
2112 break
a6211d23 2113 else:
545cc85d 2114 thumbnail = search_meta(['og:image', 'twitter:image'])
2115 if thumbnail:
2116 thumbnails = [{'url': thumbnail}]
2117
2118 category = microformat.get('category') or search_meta('genre')
2119 channel_id = video_details.get('channelId') \
2120 or microformat.get('externalChannelId') \
2121 or search_meta('channelId')
2122 duration = int_or_none(
2123 video_details.get('lengthSeconds')
2124 or microformat.get('lengthSeconds')) \
2125 or parse_duration(search_meta('duration'))
2126 is_live = video_details.get('isLive')
2127 owner_profile_url = microformat.get('ownerProfileUrl')
2128
2129 info = {
2130 'id': video_id,
2131 'title': self._live_title(video_title) if is_live else video_title,
2132 'formats': formats,
2133 'thumbnails': thumbnails,
2134 'description': video_description,
2135 'upload_date': unified_strdate(
2136 microformat.get('uploadDate')
2137 or search_meta('uploadDate')),
2138 'uploader': video_details['author'],
2139 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2140 'uploader_url': owner_profile_url,
2141 'channel_id': channel_id,
2142 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2143 'duration': duration,
2144 'view_count': int_or_none(
2145 video_details.get('viewCount')
2146 or microformat.get('viewCount')
2147 or search_meta('interactionCount')),
2148 'average_rating': float_or_none(video_details.get('averageRating')),
2149 'age_limit': 18 if (
2150 microformat.get('isFamilySafe') is False
2151 or search_meta('isFamilyFriendly') == 'false'
2152 or search_meta('og:restrictions:age') == '18+') else 0,
2153 'webpage_url': webpage_url,
2154 'categories': [category] if category else None,
2155 'tags': keywords,
2156 'is_live': is_live,
2157 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2158 'was_live': video_details.get('isLiveContent'),
545cc85d 2159 }
b477fc13 2160
545cc85d 2161 pctr = try_get(
2162 player_response,
2163 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2164 subtitles = {}
2165 if pctr:
2166 def process_language(container, base_url, lang_code, query):
2167 lang_subs = []
2168 for fmt in self._SUBTITLE_FORMATS:
2169 query.update({
2170 'fmt': fmt,
2171 })
2172 lang_subs.append({
2173 'ext': fmt,
2174 'url': update_url_query(base_url, query),
2175 })
2176 container[lang_code] = lang_subs
7e72694b 2177
545cc85d 2178 for caption_track in (pctr.get('captionTracks') or []):
2179 base_url = caption_track.get('baseUrl')
2180 if not base_url:
2181 continue
2182 if caption_track.get('kind') != 'asr':
2183 lang_code = caption_track.get('languageCode')
2184 if not lang_code:
2185 continue
2186 process_language(
2187 subtitles, base_url, lang_code, {})
2188 continue
2189 automatic_captions = {}
2190 for translation_language in (pctr.get('translationLanguages') or []):
2191 translation_language_code = translation_language.get('languageCode')
2192 if not translation_language_code:
2193 continue
2194 process_language(
2195 automatic_captions, base_url, translation_language_code,
2196 {'tlang': translation_language_code})
2197 info['automatic_captions'] = automatic_captions
2198 info['subtitles'] = subtitles
7e72694b 2199
545cc85d 2200 parsed_url = compat_urllib_parse_urlparse(url)
2201 for component in [parsed_url.fragment, parsed_url.query]:
2202 query = compat_parse_qs(component)
2203 for k, v in query.items():
2204 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2205 d_k += '_time'
2206 if d_k not in info and k in s_ks:
2207 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2208
2209 # Youtube Music Auto-generated description
822b9d9c 2210 if video_description:
38d70284 2211 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2212 if mobj:
822b9d9c
RA
2213 release_year = mobj.group('release_year')
2214 release_date = mobj.group('release_date')
2215 if release_date:
2216 release_date = release_date.replace('-', '')
2217 if not release_year:
545cc85d 2218 release_year = release_date[:4]
2219 info.update({
2220 'album': mobj.group('album'.strip()),
2221 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2222 'track': mobj.group('track').strip(),
2223 'release_date': release_date,
cc2db878 2224 'release_year': int_or_none(release_year),
545cc85d 2225 })
7e72694b 2226
545cc85d 2227 initial_data = None
2228 if webpage:
2229 initial_data = self._extract_yt_initial_variable(
2230 webpage, self._YT_INITIAL_DATA_RE, video_id,
2231 'yt initial data')
2232 if not initial_data:
2233 initial_data = self._call_api(
f4f751af 2234 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2235
2236 if not is_live:
2237 try:
2238 # This will error if there is no livechat
2239 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2240 info['subtitles']['live_chat'] = [{
394dcd44 2241 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2242 'video_id': video_id,
2243 'ext': 'json',
2244 'protocol': 'youtube_live_chat_replay',
2245 }]
2246 except (KeyError, IndexError, TypeError):
2247 pass
2248
2249 if initial_data:
2250 chapters = self._extract_chapters_from_json(
2251 initial_data, video_id, duration)
2252 if not chapters:
2253 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2254 contents = try_get(
2255 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2256 list)
2257 if not contents:
2258 continue
2259
2260 def chapter_time(mmlir):
2261 return parse_duration(
2262 get_text(mmlir.get('timeDescription')))
2263
2264 chapters = []
2265 for next_num, content in enumerate(contents, start=1):
2266 mmlir = content.get('macroMarkersListItemRenderer') or {}
2267 start_time = chapter_time(mmlir)
2268 end_time = chapter_time(try_get(
2269 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2270 if next_num < len(contents) else duration
2271 if start_time is None or end_time is None:
2272 continue
2273 chapters.append({
2274 'start_time': start_time,
2275 'end_time': end_time,
2276 'title': get_text(mmlir.get('title')),
2277 })
2278 if chapters:
2279 break
2280 if chapters:
2281 info['chapters'] = chapters
2282
2283 contents = try_get(
2284 initial_data,
2285 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2286 list) or []
2287 for content in contents:
2288 vpir = content.get('videoPrimaryInfoRenderer')
2289 if vpir:
2290 stl = vpir.get('superTitleLink')
2291 if stl:
2292 stl = get_text(stl)
2293 if try_get(
2294 vpir,
2295 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2296 info['location'] = stl
2297 else:
2298 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2299 if mobj:
2300 info.update({
2301 'series': mobj.group(1),
2302 'season_number': int(mobj.group(2)),
2303 'episode_number': int(mobj.group(3)),
2304 })
2305 for tlb in (try_get(
2306 vpir,
2307 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2308 list) or []):
2309 tbr = tlb.get('toggleButtonRenderer') or {}
2310 for getter, regex in [(
2311 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2312 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2313 lambda x: x['accessibility'],
2314 lambda x: x['accessibilityData']['accessibilityData'],
2315 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2316 label = (try_get(tbr, getter, dict) or {}).get('label')
2317 if label:
2318 mobj = re.match(regex, label)
2319 if mobj:
2320 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2321 break
2322 sbr_tooltip = try_get(
2323 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2324 if sbr_tooltip:
2325 like_count, dislike_count = sbr_tooltip.split(' / ')
2326 info.update({
2327 'like_count': str_to_int(like_count),
2328 'dislike_count': str_to_int(dislike_count),
2329 })
2330 vsir = content.get('videoSecondaryInfoRenderer')
2331 if vsir:
2332 info['channel'] = get_text(try_get(
2333 vsir,
2334 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2335 dict))
545cc85d 2336 rows = try_get(
2337 vsir,
2338 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2339 list) or []
2340 multiple_songs = False
2341 for row in rows:
2342 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2343 multiple_songs = True
2344 break
2345 for row in rows:
2346 mrr = row.get('metadataRowRenderer') or {}
2347 mrr_title = mrr.get('title')
2348 if not mrr_title:
2349 continue
2350 mrr_title = get_text(mrr['title'])
2351 mrr_contents_text = get_text(mrr['contents'][0])
2352 if mrr_title == 'License':
2353 info['license'] = mrr_contents_text
2354 elif not multiple_songs:
2355 if mrr_title == 'Album':
2356 info['album'] = mrr_contents_text
2357 elif mrr_title == 'Artist':
2358 info['artist'] = mrr_contents_text
2359 elif mrr_title == 'Song':
2360 info['track'] = mrr_contents_text
2361
2362 fallbacks = {
2363 'channel': 'uploader',
2364 'channel_id': 'uploader_id',
2365 'channel_url': 'uploader_url',
2366 }
2367 for to, frm in fallbacks.items():
2368 if not info.get(to):
2369 info[to] = info.get(frm)
2370
2371 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2372 v = info.get(s_k)
2373 if v:
2374 info[d_k] = v
b84071c0 2375
c224251a
M
2376 is_private = bool_or_none(video_details.get('isPrivate'))
2377 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2378 is_membersonly = None
b28f8d24 2379 is_premium = None
c224251a
M
2380 if initial_data and is_private is not None:
2381 is_membersonly = False
b28f8d24 2382 is_premium = False
c224251a
M
2383 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2384 for content in contents or []:
2385 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2386 for badge in badges or []:
2387 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2388 if label.lower() == 'members only':
2389 is_membersonly = True
2390 break
b28f8d24
M
2391 elif label.lower() == 'premium':
2392 is_premium = True
2393 break
2394 if is_membersonly or is_premium:
c224251a
M
2395 break
2396
2397 # TODO: Add this for playlists
2398 info['availability'] = self._availability(
2399 is_private=is_private,
b28f8d24 2400 needs_premium=is_premium,
c224251a
M
2401 needs_subscription=is_membersonly,
2402 needs_auth=info['age_limit'] >= 18,
2403 is_unlisted=None if is_private is None else is_unlisted)
2404
06167fbb 2405 # get xsrf for annotations or comments
2406 get_annotations = self._downloader.params.get('writeannotations', False)
2407 get_comments = self._downloader.params.get('getcomments', False)
2408 if get_annotations or get_comments:
29f7c58a 2409 xsrf_token = None
545cc85d 2410 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2411 if ytcfg:
2412 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2413 if not xsrf_token:
2414 xsrf_token = self._search_regex(
2415 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2416 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2417
2418 # annotations
06167fbb 2419 if get_annotations:
64b6a4e9
RA
2420 invideo_url = try_get(
2421 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2422 if xsrf_token and invideo_url:
29f7c58a 2423 xsrf_field_name = None
2424 if ytcfg:
2425 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2426 if not xsrf_field_name:
2427 xsrf_field_name = self._search_regex(
2428 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2429 webpage, 'xsrf field name',
29f7c58a 2430 group='xsrf_field_name', default='session_token')
8a784c74 2431 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2432 self._proto_relative_url(invideo_url),
2433 video_id, note='Downloading annotations',
2434 errnote='Unable to download video annotations', fatal=False,
2435 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2436
277d6ff5 2437 if get_comments:
a1c5d2ca 2438 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2439
545cc85d 2440 self.mark_watched(video_id, player_response)
d77ab8e2 2441
545cc85d 2442 return info
c5e8d7af 2443
5f6a1245 2444
8bdd16b4 2445class YoutubeTabIE(YoutubeBaseInfoExtractor):
2446 IE_DESC = 'YouTube.com tab'
70d5c17b 2447 _VALID_URL = r'''(?x)
2448 https?://
2449 (?:\w+\.)?
2450 (?:
2451 youtube(?:kids)?\.com|
2452 invidio\.us
2453 )/
2454 (?:
2455 (?:channel|c|user)/|
2456 (?P<not_channel>
9ba5705a 2457 feed/|hashtag/|
70d5c17b 2458 (?:playlist|watch)\?.*?\blist=
2459 )|
29f7c58a 2460 (?!(?:%s)\b) # Direct URLs
70d5c17b 2461 )
2462 (?P<id>[^/?\#&]+)
2463 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2464 IE_NAME = 'youtube:tab'
2465
81127aa5 2466 _TESTS = [{
8bdd16b4 2467 # playlists, multipage
2468 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2469 'playlist_mincount': 94,
2470 'info_dict': {
2471 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2472 'title': 'Игорь Клейнер - Playlists',
2473 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2474 'uploader': 'Игорь Клейнер',
2475 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2476 },
2477 }, {
2478 # playlists, multipage, different order
2479 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2480 'playlist_mincount': 94,
2481 'info_dict': {
2482 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2483 'title': 'Игорь Клейнер - Playlists',
2484 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2485 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2486 'uploader': 'Игорь Клейнер',
8bdd16b4 2487 },
2488 }, {
2489 # playlists, singlepage
2490 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2491 'playlist_mincount': 4,
2492 'info_dict': {
2493 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2494 'title': 'ThirstForScience - Playlists',
2495 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2496 'uploader': 'ThirstForScience',
2497 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2498 }
2499 }, {
2500 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2501 'only_matching': True,
2502 }, {
2503 # basic, single video playlist
0e30a7b9 2504 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2505 'info_dict': {
0e30a7b9 2506 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2507 'uploader': 'Sergey M.',
2508 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2509 'title': 'youtube-dl public playlist',
81127aa5 2510 },
0e30a7b9 2511 'playlist_count': 1,
9291475f 2512 }, {
8bdd16b4 2513 # empty playlist
0e30a7b9 2514 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2515 'info_dict': {
0e30a7b9 2516 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2517 'uploader': 'Sergey M.',
2518 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2519 'title': 'youtube-dl empty playlist',
9291475f
PH
2520 },
2521 'playlist_count': 0,
2522 }, {
8bdd16b4 2523 # Home tab
2524 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2525 'info_dict': {
8bdd16b4 2526 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2527 'title': 'lex will - Home',
2528 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2529 'uploader': 'lex will',
2530 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2531 },
8bdd16b4 2532 'playlist_mincount': 2,
9291475f 2533 }, {
8bdd16b4 2534 # Videos tab
2535 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2536 'info_dict': {
8bdd16b4 2537 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2538 'title': 'lex will - Videos',
2539 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2540 'uploader': 'lex will',
2541 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2542 },
8bdd16b4 2543 'playlist_mincount': 975,
9291475f 2544 }, {
8bdd16b4 2545 # Videos tab, sorted by popular
2546 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2547 'info_dict': {
8bdd16b4 2548 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2549 'title': 'lex will - Videos',
2550 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2551 'uploader': 'lex will',
2552 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2553 },
8bdd16b4 2554 'playlist_mincount': 199,
9291475f 2555 }, {
8bdd16b4 2556 # Playlists tab
2557 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2558 'info_dict': {
8bdd16b4 2559 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2560 'title': 'lex will - Playlists',
2561 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2562 'uploader': 'lex will',
2563 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2564 },
8bdd16b4 2565 'playlist_mincount': 17,
ac7553d0 2566 }, {
8bdd16b4 2567 # Community tab
2568 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2569 'info_dict': {
8bdd16b4 2570 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2571 'title': 'lex will - Community',
2572 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2573 'uploader': 'lex will',
2574 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2575 },
2576 'playlist_mincount': 18,
87dadd45 2577 }, {
8bdd16b4 2578 # Channels tab
2579 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2580 'info_dict': {
8bdd16b4 2581 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2582 'title': 'lex will - Channels',
2583 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2584 'uploader': 'lex will',
2585 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2586 },
deaec5af 2587 'playlist_mincount': 12,
6b08cdf6 2588 }, {
a0566bbf 2589 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2590 'only_matching': True,
2591 }, {
a0566bbf 2592 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2593 'only_matching': True,
2594 }, {
a0566bbf 2595 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2596 'only_matching': True,
2597 }, {
2598 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2599 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2600 'info_dict': {
2601 'title': '29C3: Not my department',
2602 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2603 'uploader': 'Christiaan008',
2604 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2605 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2606 },
2607 'playlist_count': 96,
2608 }, {
2609 'note': 'Large playlist',
2610 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2611 'info_dict': {
8bdd16b4 2612 'title': 'Uploads from Cauchemar',
2613 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2614 'uploader': 'Cauchemar',
2615 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2616 },
8bdd16b4 2617 'playlist_mincount': 1123,
2618 }, {
2619 # even larger playlist, 8832 videos
2620 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2621 'only_matching': True,
4b7df0d3
JMF
2622 }, {
2623 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2624 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2625 'info_dict': {
acf757f4
PH
2626 'title': 'Uploads from Interstellar Movie',
2627 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2628 'uploader': 'Interstellar Movie',
8bdd16b4 2629 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2630 },
481cc733 2631 'playlist_mincount': 21,
358de58c 2632 }, {
2633 'note': 'Playlist with "show unavailable videos" button',
2634 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2635 'info_dict': {
2636 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2637 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2638 'uploader': 'Phim Siêu Nhân Nhật Bản',
2639 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2640 },
2641 'playlist_mincount': 1400,
2642 'expected_warnings': [
2643 'YouTube said: INFO - Unavailable videos are hidden',
2644 ]
8bdd16b4 2645 }, {
2646 # https://github.com/ytdl-org/youtube-dl/issues/21844
2647 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2648 'info_dict': {
2649 'title': 'Data Analysis with Dr Mike Pound',
2650 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2651 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2652 'uploader': 'Computerphile',
deaec5af 2653 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2654 },
2655 'playlist_mincount': 11,
2656 }, {
a0566bbf 2657 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2658 'only_matching': True,
dacb3a86
S
2659 }, {
2660 # Playlist URL that does not actually serve a playlist
2661 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2662 'info_dict': {
2663 'id': 'FqZTN594JQw',
2664 'ext': 'webm',
2665 'title': "Smiley's People 01 detective, Adventure Series, Action",
2666 'uploader': 'STREEM',
2667 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2668 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2669 'upload_date': '20150526',
2670 'license': 'Standard YouTube License',
2671 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2672 'categories': ['People & Blogs'],
2673 'tags': list,
dbdaaa23 2674 'view_count': int,
dacb3a86
S
2675 'like_count': int,
2676 'dislike_count': int,
2677 },
2678 'params': {
2679 'skip_download': True,
2680 },
13a75688 2681 'skip': 'This video is not available.',
dacb3a86 2682 'add_ie': [YoutubeIE.ie_key()],
481cc733 2683 }, {
8bdd16b4 2684 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2685 'only_matching': True,
66b48727 2686 }, {
8bdd16b4 2687 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2688 'only_matching': True,
a0566bbf 2689 }, {
2690 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2691 'info_dict': {
2692 'id': '9Auq9mYxFEE',
2693 'ext': 'mp4',
deaec5af 2694 'title': compat_str,
a0566bbf 2695 'uploader': 'Sky News',
2696 'uploader_id': 'skynews',
2697 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2698 'upload_date': '20191102',
deaec5af 2699 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2700 'categories': ['News & Politics'],
2701 'tags': list,
2702 'like_count': int,
2703 'dislike_count': int,
2704 },
2705 'params': {
2706 'skip_download': True,
2707 },
2708 }, {
2709 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2710 'info_dict': {
2711 'id': 'a48o2S1cPoo',
2712 'ext': 'mp4',
2713 'title': 'The Young Turks - Live Main Show',
2714 'uploader': 'The Young Turks',
2715 'uploader_id': 'TheYoungTurks',
2716 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2717 'upload_date': '20150715',
2718 'license': 'Standard YouTube License',
2719 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2720 'categories': ['News & Politics'],
2721 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2722 'like_count': int,
2723 'dislike_count': int,
2724 },
2725 'params': {
2726 'skip_download': True,
2727 },
2728 'only_matching': True,
2729 }, {
2730 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2731 'only_matching': True,
2732 }, {
2733 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2734 'only_matching': True,
3d3dddc9 2735 }, {
2736 'url': 'https://www.youtube.com/feed/trending',
2737 'only_matching': True,
2738 }, {
2739 # needs auth
2740 'url': 'https://www.youtube.com/feed/library',
2741 'only_matching': True,
2742 }, {
2743 # needs auth
2744 'url': 'https://www.youtube.com/feed/history',
2745 'only_matching': True,
2746 }, {
2747 # needs auth
2748 'url': 'https://www.youtube.com/feed/subscriptions',
2749 'only_matching': True,
2750 }, {
2751 # needs auth
2752 'url': 'https://www.youtube.com/feed/watch_later',
2753 'only_matching': True,
2754 }, {
2755 # no longer available?
2756 'url': 'https://www.youtube.com/feed/recommended',
2757 'only_matching': True,
29f7c58a 2758 }, {
2759 # inline playlist with not always working continuations
2760 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2761 'only_matching': True,
2762 }, {
2763 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2764 'only_matching': True,
2765 }, {
2766 'url': 'https://www.youtube.com/course',
2767 'only_matching': True,
2768 }, {
2769 'url': 'https://www.youtube.com/zsecurity',
2770 'only_matching': True,
2771 }, {
2772 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2773 'only_matching': True,
2774 }, {
2775 'url': 'https://www.youtube.com/TheYoungTurks/live',
2776 'only_matching': True,
39ed931e 2777 }, {
2778 'url': 'https://www.youtube.com/hashtag/cctv9',
2779 'info_dict': {
2780 'id': 'cctv9',
2781 'title': '#cctv9',
2782 },
2783 'playlist_mincount': 350,
29f7c58a 2784 }]
2785
2786 @classmethod
2787 def suitable(cls, url):
2788 return False if YoutubeIE.suitable(url) else super(
2789 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2790
2791 def _extract_channel_id(self, webpage):
2792 channel_id = self._html_search_meta(
2793 'channelId', webpage, 'channel id', default=None)
2794 if channel_id:
2795 return channel_id
2796 channel_url = self._html_search_meta(
2797 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2798 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2799 'twitter:app:url:googleplay'), webpage, 'channel url')
2800 return self._search_regex(
2801 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2802 channel_url, 'channel id')
15f6397c 2803
8bdd16b4 2804 @staticmethod
cd7c66cf 2805 def _extract_basic_item_renderer(item):
2806 # Modified from _extract_grid_item_renderer
2807 known_renderers = (
e3c07697 2808 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2809 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2810 )
2811 for key, renderer in item.items():
2812 if key not in known_renderers:
2813 continue
2814 return renderer
8bdd16b4 2815
8bdd16b4 2816 def _grid_entries(self, grid_renderer):
2817 for item in grid_renderer['items']:
2818 if not isinstance(item, dict):
39b62db1 2819 continue
cd7c66cf 2820 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2821 if not isinstance(renderer, dict):
2822 continue
2823 title = try_get(
2824 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2825 # playlist
2826 playlist_id = renderer.get('playlistId')
2827 if playlist_id:
2828 yield self.url_result(
2829 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2830 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2831 video_title=title)
2832 # video
2833 video_id = renderer.get('videoId')
2834 if video_id:
2835 yield self._extract_video(renderer)
2836 # channel
2837 channel_id = renderer.get('channelId')
2838 if channel_id:
2839 title = try_get(
2840 renderer, lambda x: x['title']['simpleText'], compat_str)
2841 yield self.url_result(
2842 'https://www.youtube.com/channel/%s' % channel_id,
2843 ie=YoutubeTabIE.ie_key(), video_title=title)
2844
3d3dddc9 2845 def _shelf_entries_from_content(self, shelf_renderer):
2846 content = shelf_renderer.get('content')
2847 if not isinstance(content, dict):
8bdd16b4 2848 return
cd7c66cf 2849 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2850 if renderer:
2851 # TODO: add support for nested playlists so each shelf is processed
2852 # as separate playlist
2853 # TODO: this includes only first N items
2854 for entry in self._grid_entries(renderer):
2855 yield entry
2856 renderer = content.get('horizontalListRenderer')
2857 if renderer:
2858 # TODO
2859 pass
8bdd16b4 2860
29f7c58a 2861 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2862 ep = try_get(
2863 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2864 compat_str)
2865 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2866 if shelf_url:
29f7c58a 2867 # Skipping links to another channels, note that checking for
2868 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2869 # will not work
2870 if skip_channels and '/channels?' in shelf_url:
2871 return
3d3dddc9 2872 title = try_get(
2873 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2874 yield self.url_result(shelf_url, video_title=title)
2875 # Shelf may not contain shelf URL, fallback to extraction from content
2876 for entry in self._shelf_entries_from_content(shelf_renderer):
2877 yield entry
c5e8d7af 2878
8bdd16b4 2879 def _playlist_entries(self, video_list_renderer):
2880 for content in video_list_renderer['contents']:
2881 if not isinstance(content, dict):
2882 continue
2883 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2884 if not isinstance(renderer, dict):
2885 continue
2886 video_id = renderer.get('videoId')
2887 if not video_id:
2888 continue
2889 yield self._extract_video(renderer)
07aeced6 2890
3462ffa8 2891 def _rich_entries(self, rich_grid_renderer):
2892 renderer = try_get(
70d5c17b 2893 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2894 video_id = renderer.get('videoId')
2895 if not video_id:
2896 return
2897 yield self._extract_video(renderer)
2898
8bdd16b4 2899 def _video_entry(self, video_renderer):
2900 video_id = video_renderer.get('videoId')
2901 if video_id:
2902 return self._extract_video(video_renderer)
dacb3a86 2903
8bdd16b4 2904 def _post_thread_entries(self, post_thread_renderer):
2905 post_renderer = try_get(
2906 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2907 if not post_renderer:
2908 return
2909 # video attachment
2910 video_renderer = try_get(
2911 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2912 video_id = None
2913 if video_renderer:
2914 entry = self._video_entry(video_renderer)
2915 if entry:
2916 yield entry
2917 # inline video links
2918 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2919 for run in runs:
2920 if not isinstance(run, dict):
2921 continue
2922 ep_url = try_get(
2923 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2924 if not ep_url:
2925 continue
2926 if not YoutubeIE.suitable(ep_url):
2927 continue
2928 ep_video_id = YoutubeIE._match_id(ep_url)
2929 if video_id == ep_video_id:
2930 continue
2931 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2932
8bdd16b4 2933 def _post_thread_continuation_entries(self, post_thread_continuation):
2934 contents = post_thread_continuation.get('contents')
2935 if not isinstance(contents, list):
2936 return
2937 for content in contents:
2938 renderer = content.get('backstagePostThreadRenderer')
2939 if not isinstance(renderer, dict):
2940 continue
2941 for entry in self._post_thread_entries(renderer):
2942 yield entry
07aeced6 2943
39ed931e 2944 r''' # unused
2945 def _rich_grid_entries(self, contents):
2946 for content in contents:
2947 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
2948 if video_renderer:
2949 entry = self._video_entry(video_renderer)
2950 if entry:
2951 yield entry
2952 '''
2953
29f7c58a 2954 @staticmethod
2955 def _build_continuation_query(continuation, ctp=None):
2956 query = {
2957 'ctoken': continuation,
2958 'continuation': continuation,
2959 }
2960 if ctp:
2961 query['itct'] = ctp
2962 return query
2963
8bdd16b4 2964 @staticmethod
2965 def _extract_next_continuation_data(renderer):
2966 next_continuation = try_get(
2967 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2968 if not next_continuation:
2969 return
2970 continuation = next_continuation.get('continuation')
2971 if not continuation:
2972 return
2973 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2974 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2975
8bdd16b4 2976 @classmethod
2977 def _extract_continuation(cls, renderer):
2978 next_continuation = cls._extract_next_continuation_data(renderer)
2979 if next_continuation:
2980 return next_continuation
cc2db878 2981 contents = []
2982 for key in ('contents', 'items'):
2983 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2984 for content in contents:
2985 if not isinstance(content, dict):
2986 continue
2987 continuation_ep = try_get(
2988 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2989 dict)
2990 if not continuation_ep:
2991 continue
2992 continuation = try_get(
2993 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2994 if not continuation:
2995 continue
2996 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2997 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2998
f4f751af 2999 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3000
70d5c17b 3001 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3002 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3003 for content in contents:
3004 if not isinstance(content, dict):
8bdd16b4 3005 continue
70d5c17b 3006 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3007 if not is_renderer:
70d5c17b 3008 renderer = content.get('richItemRenderer')
3462ffa8 3009 if renderer:
3010 for entry in self._rich_entries(renderer):
3011 yield entry
3012 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3013 continue
3462ffa8 3014 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3015 for isr_content in isr_contents:
3016 if not isinstance(isr_content, dict):
3017 continue
69184e41 3018
3019 known_renderers = {
3020 'playlistVideoListRenderer': self._playlist_entries,
3021 'gridRenderer': self._grid_entries,
3022 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3023 'backstagePostThreadRenderer': self._post_thread_entries,
3024 'videoRenderer': lambda x: [self._video_entry(x)],
3025 }
3026 for key, renderer in isr_content.items():
3027 if key not in known_renderers:
3028 continue
3029 for entry in known_renderers[key](renderer):
3030 if entry:
3031 yield entry
3462ffa8 3032 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3033 break
70d5c17b 3034
3462ffa8 3035 if not continuation_list[0]:
3036 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3037
3038 if not continuation_list[0]:
3039 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3040
3041 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3042 tab_content = try_get(tab, lambda x: x['content'], dict)
3043 if not tab_content:
3044 return
3462ffa8 3045 parent_renderer = (
29f7c58a 3046 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3047 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3048 for entry in extract_entries(parent_renderer):
3049 yield entry
3462ffa8 3050 continuation = continuation_list[0]
f4f751af 3051 context = self._extract_context(ytcfg)
3052 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3053
8bdd16b4 3054 for page_num in itertools.count(1):
3055 if not continuation:
3056 break
79360d99 3057 query = {
3058 'continuation': continuation['continuation'],
3059 'clickTracking': {'clickTrackingParams': continuation['itct']}
3060 }
f4f751af 3061 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3062 response = self._extract_response(
3063 item_id='%s page %s' % (item_id, page_num),
3064 query=query, headers=headers, ytcfg=ytcfg,
3065 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3066
3067 if not response:
8bdd16b4 3068 break
f4f751af 3069 visitor_data = try_get(
3070 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3071
69184e41 3072 known_continuation_renderers = {
3073 'playlistVideoListContinuation': self._playlist_entries,
3074 'gridContinuation': self._grid_entries,
3075 'itemSectionContinuation': self._post_thread_continuation_entries,
3076 'sectionListContinuation': extract_entries, # for feeds
3077 }
8bdd16b4 3078 continuation_contents = try_get(
69184e41 3079 response, lambda x: x['continuationContents'], dict) or {}
3080 continuation_renderer = None
3081 for key, value in continuation_contents.items():
3082 if key not in known_continuation_renderers:
3462ffa8 3083 continue
69184e41 3084 continuation_renderer = value
3085 continuation_list = [None]
3086 for entry in known_continuation_renderers[key](continuation_renderer):
3087 yield entry
3088 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3089 break
3090 if continuation_renderer:
3091 continue
c5e8d7af 3092
a1b535bd 3093 known_renderers = {
3094 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3095 'gridVideoRenderer': (self._grid_entries, 'items'),
3096 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3097 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3098 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3099 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3100 }
cce889b9 3101 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3102 continuation_items = try_get(
cce889b9 3103 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3104 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3105 video_items_renderer = None
3106 for key, value in continuation_item.items():
3107 if key not in known_renderers:
8bdd16b4 3108 continue
a1b535bd 3109 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3110 continuation_list = [None]
a1b535bd 3111 for entry in known_renderers[key][0](video_items_renderer):
3112 yield entry
9ba5705a 3113 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3114 break
3115 if video_items_renderer:
3116 continue
8bdd16b4 3117 break
9558dcec 3118
8bdd16b4 3119 @staticmethod
3120 def _extract_selected_tab(tabs):
3121 for tab in tabs:
3122 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3123 return tab['tabRenderer']
2b3c2546 3124 else:
8bdd16b4 3125 raise ExtractorError('Unable to find selected tab')
b82f815f 3126
8bdd16b4 3127 @staticmethod
3128 def _extract_uploader(data):
3129 uploader = {}
3130 sidebar_renderer = try_get(
3131 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3132 if sidebar_renderer:
3133 for item in sidebar_renderer:
3134 if not isinstance(item, dict):
3135 continue
3136 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3137 if not isinstance(renderer, dict):
3138 continue
3139 owner = try_get(
3140 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3141 if owner:
3142 uploader['uploader'] = owner.get('text')
3143 uploader['uploader_id'] = try_get(
3144 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3145 uploader['uploader_url'] = urljoin(
3146 'https://www.youtube.com/',
3147 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3148 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3149
d069eca7 3150 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3151 playlist_id = title = description = channel_url = channel_name = channel_id = None
3152 thumbnails_list = tags = []
3153
8bdd16b4 3154 selected_tab = self._extract_selected_tab(tabs)
3155 renderer = try_get(
3156 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3157 if renderer:
b60419c5 3158 channel_name = renderer.get('title')
3159 channel_url = renderer.get('channelUrl')
3160 channel_id = renderer.get('externalId')
39ed931e 3161 else:
64c0d954 3162 renderer = try_get(
3163 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3164
8bdd16b4 3165 if renderer:
3166 title = renderer.get('title')
ecc97af3 3167 description = renderer.get('description', '')
b60419c5 3168 playlist_id = channel_id
3169 tags = renderer.get('keywords', '').split()
3170 thumbnails_list = (
3171 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3172 or try_get(
3173 data,
3174 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3175 list)
b60419c5 3176 or [])
3177
3178 thumbnails = []
3179 for t in thumbnails_list:
3180 if not isinstance(t, dict):
3181 continue
3182 thumbnail_url = url_or_none(t.get('url'))
3183 if not thumbnail_url:
3184 continue
3185 thumbnails.append({
3186 'url': thumbnail_url,
3187 'width': int_or_none(t.get('width')),
3188 'height': int_or_none(t.get('height')),
3189 })
3462ffa8 3190 if playlist_id is None:
70d5c17b 3191 playlist_id = item_id
3192 if title is None:
39ed931e 3193 title = (
3194 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3195 or playlist_id)
b60419c5 3196 title += format_field(selected_tab, 'title', ' - %s')
3197
3198 metadata = {
3199 'playlist_id': playlist_id,
3200 'playlist_title': title,
3201 'playlist_description': description,
3202 'uploader': channel_name,
3203 'uploader_id': channel_id,
3204 'uploader_url': channel_url,
3205 'thumbnails': thumbnails,
3206 'tags': tags,
3207 }
3208 if not channel_id:
3209 metadata.update(self._extract_uploader(data))
3210 metadata.update({
3211 'channel': metadata['uploader'],
3212 'channel_id': metadata['uploader_id'],
3213 'channel_url': metadata['uploader_url']})
3214 return self.playlist_result(
d069eca7
M
3215 self._entries(
3216 selected_tab, playlist_id,
3217 self._extract_identity_token(webpage, item_id),
f4f751af 3218 self._extract_account_syncid(data),
3219 self._extract_ytcfg(item_id, webpage)),
b60419c5 3220 **metadata)
73c4ac2c 3221
79360d99 3222 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3223 first_id = last_id = None
79360d99 3224 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3225 headers = self._generate_api_headers(
3226 ytcfg, account_syncid=self._extract_account_syncid(data),
3227 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3228 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3229 for page_num in itertools.count(1):
cd7c66cf 3230 videos = list(self._playlist_entries(playlist))
3231 if not videos:
3232 return
2be71994 3233 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3234 if start >= len(videos):
3235 return
3236 for video in videos[start:]:
3237 if video['id'] == first_id:
3238 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3239 return
3240 yield video
3241 first_id = first_id or videos[0]['id']
3242 last_id = videos[-1]['id']
79360d99 3243 watch_endpoint = try_get(
3244 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3245 query = {
3246 'playlistId': playlist_id,
3247 'videoId': watch_endpoint.get('videoId') or last_id,
3248 'index': watch_endpoint.get('index') or len(videos),
3249 'params': watch_endpoint.get('params') or 'OAE%3D'
3250 }
3251 response = self._extract_response(
3252 item_id='%s page %d' % (playlist_id, page_num),
3253 query=query,
3254 ep='next',
3255 headers=headers,
3256 check_get_keys='contents'
3257 )
cd7c66cf 3258 playlist = try_get(
79360d99 3259 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3260
79360d99 3261 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3262 title = playlist.get('title') or try_get(
3263 data, lambda x: x['titleText']['simpleText'], compat_str)
3264 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3265
3266 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3267 playlist_url = urljoin(url, try_get(
3268 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3269 compat_str))
3270 if playlist_url and playlist_url != url:
3271 return self.url_result(
3272 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3273 video_title=title)
cd7c66cf 3274
8bdd16b4 3275 return self.playlist_result(
79360d99 3276 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3277 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3278
f3eaa8dd
M
3279 def _extract_alerts(self, data, expected=False):
3280
3281 def _real_extract_alerts():
3282 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3283 if not isinstance(alert_dict, dict):
02ced43c 3284 continue
f3eaa8dd
M
3285 for alert in alert_dict.values():
3286 alert_type = alert.get('type')
3287 if not alert_type:
3288 continue
3ffc7c89 3289 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3290 if message:
3291 yield alert_type, message
f3eaa8dd 3292 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3293 message += try_get(run, lambda x: x['text'], compat_str)
3294 if message:
3295 yield alert_type, message
f3eaa8dd 3296
3ffc7c89 3297 errors = []
3298 warnings = []
f3eaa8dd
M
3299 for alert_type, alert_message in _real_extract_alerts():
3300 if alert_type.lower() == 'error':
3ffc7c89 3301 errors.append([alert_type, alert_message])
f3eaa8dd 3302 else:
3ffc7c89 3303 warnings.append([alert_type, alert_message])
f3eaa8dd 3304
3ffc7c89 3305 for alert_type, alert_message in (warnings + errors[:-1]):
3306 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3307 if errors:
3308 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3309
358de58c 3310 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3311 """
3312 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3313 """
3314 sidebar_renderer = try_get(
3315 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3316 for item in sidebar_renderer:
3317 if not isinstance(item, dict):
3318 continue
3319 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3320 menu_renderer = try_get(
3321 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3322 for menu_item in menu_renderer:
3323 if not isinstance(menu_item, dict):
3324 continue
3325 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3326 text = try_get(
3327 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3328 if not text or text.lower() != 'show unavailable videos':
3329 continue
3330 browse_endpoint = try_get(
3331 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3332 browse_id = browse_endpoint.get('browseId')
3333 params = browse_endpoint.get('params')
3334 if not browse_id or not params:
3335 return
3336 ytcfg = self._extract_ytcfg(item_id, webpage)
3337 headers = self._generate_api_headers(
3338 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3339 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3340 visitor_data=try_get(
3341 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3342 query = {
3343 'params': params,
3344 'browseId': browse_id
3345 }
3346 return self._extract_response(
3347 item_id=item_id, headers=headers, query=query,
3348 check_get_keys='contents', fatal=False,
3349 note='Downloading API JSON with unavailable videos')
3350
79360d99 3351 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3352 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3353 response = None
3354 last_error = None
3355 count = -1
3356 retries = self._downloader.params.get('extractor_retries', 3)
3357 if check_get_keys is None:
3358 check_get_keys = []
3359 while count < retries:
3360 count += 1
3361 if last_error:
3362 self.report_warning('%s. Retrying ...' % last_error)
3363 try:
3364 response = self._call_api(
3365 ep=ep, fatal=True, headers=headers,
358de58c 3366 video_id=item_id, query=query,
79360d99 3367 context=self._extract_context(ytcfg),
3368 api_key=self._extract_api_key(ytcfg),
3369 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3370 except ExtractorError as e:
3371 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3372 # Downloading page may result in intermittent 5xx HTTP error
3373 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3374 last_error = 'HTTP Error %s' % e.cause.code
3375 if count < retries:
3376 continue
358de58c 3377 if fatal:
3378 raise
3379 else:
3380 self.report_warning(error_to_compat_str(e))
3381 return
3382
79360d99 3383 else:
3384 # Youtube may send alerts if there was an issue with the continuation page
3385 self._extract_alerts(response, expected=False)
3386 if not check_get_keys or dict_get(response, check_get_keys):
3387 break
3388 # Youtube sometimes sends incomplete data
3389 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3390 last_error = 'Incomplete data received'
3391 if count >= retries:
358de58c 3392 if fatal:
3393 raise ExtractorError(last_error)
3394 else:
3395 self.report_warning(last_error)
3396 return
79360d99 3397 return response
3398
cd7c66cf 3399 def _extract_webpage(self, url, item_id):
62bff2c1 3400 retries = self._downloader.params.get('extractor_retries', 3)
3401 count = -1
c705177d 3402 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3403 while count < retries:
62bff2c1 3404 count += 1
14fdfea9 3405 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3406 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3407 if count:
c705177d 3408 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3409 webpage = self._download_webpage(
3410 url, item_id,
cd7c66cf 3411 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3412 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3413 self._extract_alerts(data, expected=True)
14fdfea9 3414 if data.get('contents') or data.get('currentVideoEndpoint'):
3415 break
c705177d 3416 if count >= retries:
3417 self._downloader.report_error(last_error)
cd7c66cf 3418 return webpage, data
3419
3420 def _real_extract(self, url):
3421 item_id = self._match_id(url)
3422 url = compat_urlparse.urlunparse(
3423 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3424
3425 # This is not matched in a channel page with a tab selected
3426 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3427 mobj = mobj.groupdict() if mobj else {}
3428 if mobj and not mobj.get('not_channel'):
3429 self._downloader.report_warning(
3430 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3431 'To download only the videos in the home page, add a "/featured" to the URL')
3432 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3433
3434 # Handle both video/playlist URLs
3435 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3436 video_id = qs.get('v', [None])[0]
3437 playlist_id = qs.get('list', [None])[0]
3438
3439 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3440 if not playlist_id:
3441 # If there is neither video or playlist ids,
3442 # youtube redirects to home page, which is undesirable
3443 raise ExtractorError('Unable to recognize tab page')
3444 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3445 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3446
3447 if video_id and playlist_id:
3448 if self._downloader.params.get('noplaylist'):
3449 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3450 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3451 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3452
3453 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3454
358de58c 3455 # YouTube sometimes provides a button to reload playlist with unavailable videos.
3456 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
3457
8bdd16b4 3458 tabs = try_get(
3459 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3460 if tabs:
d069eca7 3461 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3462
8bdd16b4 3463 playlist = try_get(
3464 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3465 if playlist:
79360d99 3466 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3467
a0566bbf 3468 video_id = try_get(
3469 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3470 compat_str) or video_id
8bdd16b4 3471 if video_id:
cd7c66cf 3472 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3473 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3474
8bdd16b4 3475 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3476
c5e8d7af 3477
8bdd16b4 3478class YoutubePlaylistIE(InfoExtractor):
3479 IE_DESC = 'YouTube.com playlists'
3480 _VALID_URL = r'''(?x)(?:
3481 (?:https?://)?
3482 (?:\w+\.)?
3483 (?:
3484 (?:
3485 youtube(?:kids)?\.com|
29f7c58a 3486 invidio\.us
8bdd16b4 3487 )
3488 /.*?\?.*?\blist=
3489 )?
3490 (?P<id>%(playlist_id)s)
3491 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3492 IE_NAME = 'youtube:playlist'
cdc628a4 3493 _TESTS = [{
8bdd16b4 3494 'note': 'issue #673',
3495 'url': 'PLBB231211A4F62143',
cdc628a4 3496 'info_dict': {
8bdd16b4 3497 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3498 'id': 'PLBB231211A4F62143',
3499 'uploader': 'Wickydoo',
3500 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3501 },
3502 'playlist_mincount': 29,
3503 }, {
3504 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3505 'info_dict': {
3506 'title': 'YDL_safe_search',
3507 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3508 },
3509 'playlist_count': 2,
3510 'skip': 'This playlist is private',
9558dcec 3511 }, {
8bdd16b4 3512 'note': 'embedded',
3513 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3514 'playlist_count': 4,
9558dcec 3515 'info_dict': {
8bdd16b4 3516 'title': 'JODA15',
3517 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3518 'uploader': 'milan',
3519 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3520 }
cdc628a4 3521 }, {
8bdd16b4 3522 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3523 'playlist_mincount': 982,
3524 'info_dict': {
3525 'title': '2018 Chinese New Singles (11/6 updated)',
3526 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3527 'uploader': 'LBK',
3528 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3529 }
daa0df9e 3530 }, {
29f7c58a 3531 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3532 'only_matching': True,
3533 }, {
3534 # music album playlist
3535 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3536 'only_matching': True,
3537 }]
3538
3539 @classmethod
3540 def suitable(cls, url):
3541 return False if YoutubeTabIE.suitable(url) else super(
3542 YoutubePlaylistIE, cls).suitable(url)
3543
3544 def _real_extract(self, url):
3545 playlist_id = self._match_id(url)
3546 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3547 if not qs:
3548 qs = {'list': playlist_id}
3549 return self.url_result(
3550 update_url_query('https://www.youtube.com/playlist', qs),
3551 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3552
3553
3554class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3555 IE_DESC = 'youtu.be'
29f7c58a 3556 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3557 _TESTS = [{
8bdd16b4 3558 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3559 'info_dict': {
3560 'id': 'yeWKywCrFtk',
3561 'ext': 'mp4',
3562 'title': 'Small Scale Baler and Braiding Rugs',
3563 'uploader': 'Backus-Page House Museum',
3564 'uploader_id': 'backuspagemuseum',
3565 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3566 'upload_date': '20161008',
3567 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3568 'categories': ['Nonprofits & Activism'],
3569 'tags': list,
3570 'like_count': int,
3571 'dislike_count': int,
3572 },
3573 'params': {
3574 'noplaylist': True,
3575 'skip_download': True,
3576 },
39e7107d 3577 }, {
8bdd16b4 3578 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3579 'only_matching': True,
cdc628a4
PH
3580 }]
3581
8bdd16b4 3582 def _real_extract(self, url):
29f7c58a 3583 mobj = re.match(self._VALID_URL, url)
3584 video_id = mobj.group('id')
3585 playlist_id = mobj.group('playlist_id')
8bdd16b4 3586 return self.url_result(
29f7c58a 3587 update_url_query('https://www.youtube.com/watch', {
3588 'v': video_id,
3589 'list': playlist_id,
3590 'feature': 'youtu.be',
3591 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3592
3593
3594class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3595 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3596 _VALID_URL = r'ytuser:(?P<id>.+)'
3597 _TESTS = [{
3598 'url': 'ytuser:phihag',
3599 'only_matching': True,
3600 }]
3601
3602 def _real_extract(self, url):
3603 user_id = self._match_id(url)
3604 return self.url_result(
3605 'https://www.youtube.com/user/%s' % user_id,
3606 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3607
b05654f0 3608
3d3dddc9 3609class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3610 IE_NAME = 'youtube:favorites'
3611 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3612 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3613 _LOGIN_REQUIRED = True
3614 _TESTS = [{
3615 'url': ':ytfav',
3616 'only_matching': True,
3617 }, {
3618 'url': ':ytfavorites',
3619 'only_matching': True,
3620 }]
3621
3622 def _real_extract(self, url):
3623 return self.url_result(
3624 'https://www.youtube.com/playlist?list=LL',
3625 ie=YoutubeTabIE.ie_key())
3626
3627
79360d99 3628class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3629 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3630 # there doesn't appear to be a real limit, for example if you search for
3631 # 'python' you get more than 8.000.000 results
3632 _MAX_RESULTS = float('inf')
78caa52a 3633 IE_NAME = 'youtube:search'
b05654f0 3634 _SEARCH_KEY = 'ytsearch'
6c894ea1 3635 _SEARCH_PARAMS = None
9dd8e46a 3636 _TESTS = []
b05654f0 3637
6c894ea1 3638 def _entries(self, query, n):
a5c56234 3639 data = {'query': query}
6c894ea1
U
3640 if self._SEARCH_PARAMS:
3641 data['params'] = self._SEARCH_PARAMS
3642 total = 0
3643 for page_num in itertools.count(1):
79360d99 3644 search = self._extract_response(
3645 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3646 check_get_keys=('contents', 'onResponseReceivedCommands')
3647 )
6c894ea1 3648 if not search:
b4c08069 3649 break
6c894ea1
U
3650 slr_contents = try_get(
3651 search,
3652 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3653 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3654 list)
3655 if not slr_contents:
a22b2fd1 3656 break
0366ae87 3657
0366ae87
M
3658 # Youtube sometimes adds promoted content to searches,
3659 # changing the index location of videos and token.
3660 # So we search through all entries till we find them.
30a074c2 3661 continuation_token = None
3662 for slr_content in slr_contents:
a96c6d15 3663 if continuation_token is None:
3664 continuation_token = try_get(
3665 slr_content,
3666 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3667 compat_str)
3668
30a074c2 3669 isr_contents = try_get(
3670 slr_content,
3671 lambda x: x['itemSectionRenderer']['contents'],
3672 list)
9da76d30 3673 if not isr_contents:
30a074c2 3674 continue
3675 for content in isr_contents:
3676 if not isinstance(content, dict):
3677 continue
3678 video = content.get('videoRenderer')
3679 if not isinstance(video, dict):
3680 continue
3681 video_id = video.get('videoId')
3682 if not video_id:
3683 continue
3684
3685 yield self._extract_video(video)
3686 total += 1
3687 if total == n:
3688 return
0366ae87 3689
0366ae87 3690 if not continuation_token:
6c894ea1 3691 break
0366ae87 3692 data['continuation'] = continuation_token
b05654f0 3693
6c894ea1
U
3694 def _get_n_results(self, query, n):
3695 """Get a specified number of results for a query"""
3696 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3697
c9ae7b95 3698
a3dd9248 3699class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3700 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3701 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3702 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3703 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3704
c9ae7b95 3705
386e1dd9 3706class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3707 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3708 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3709 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3710 # _MAX_RESULTS = 100
3462ffa8 3711 _TESTS = [{
3712 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3713 'playlist_mincount': 5,
3714 'info_dict': {
3715 'title': 'youtube-dl test video',
3716 }
3717 }, {
3718 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3719 'only_matching': True,
3720 }]
3721
386e1dd9 3722 @classmethod
3723 def _make_valid_url(cls):
3724 return cls._VALID_URL
3725
3462ffa8 3726 def _real_extract(self, url):
386e1dd9 3727 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3728 query = (qs.get('search_query') or qs.get('q'))[0]
3729 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3730 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3731
3732
3733class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3734 """
25f14e9f 3735 Base class for feed extractors
3d3dddc9 3736 Subclasses must define the _FEED_NAME property.
d7ae0639 3737 """
b2e8bc1b 3738 _LOGIN_REQUIRED = True
ef2f3c7f 3739 _TESTS = []
d7ae0639
JMF
3740
3741 @property
3742 def IE_NAME(self):
78caa52a 3743 return 'youtube:%s' % self._FEED_NAME
04cc9617 3744
81f0259b 3745 def _real_initialize(self):
b2e8bc1b 3746 self._login()
81f0259b 3747
3853309f 3748 def _real_extract(self, url):
3d3dddc9 3749 return self.url_result(
3750 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3751 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3752
3753
ef2f3c7f 3754class YoutubeWatchLaterIE(InfoExtractor):
3755 IE_NAME = 'youtube:watchlater'
70d5c17b 3756 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3757 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3758 _TESTS = [{
8bdd16b4 3759 'url': ':ytwatchlater',
bc7a9cd8
S
3760 'only_matching': True,
3761 }]
25f14e9f
S
3762
3763 def _real_extract(self, url):
ef2f3c7f 3764 return self.url_result(
3765 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3766
3767
25f14e9f
S
3768class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3769 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3770 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3771 _FEED_NAME = 'recommended'
3d3dddc9 3772 _TESTS = [{
3773 'url': ':ytrec',
3774 'only_matching': True,
3775 }, {
3776 'url': ':ytrecommended',
3777 'only_matching': True,
3778 }, {
3779 'url': 'https://youtube.com',
3780 'only_matching': True,
3781 }]
1ed5b5c9 3782
1ed5b5c9 3783
25f14e9f 3784class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3785 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3786 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3787 _FEED_NAME = 'subscriptions'
3d3dddc9 3788 _TESTS = [{
3789 'url': ':ytsubs',
3790 'only_matching': True,
3791 }, {
3792 'url': ':ytsubscriptions',
3793 'only_matching': True,
3794 }]
1ed5b5c9 3795
1ed5b5c9 3796
25f14e9f 3797class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3798 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3799 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3800 _FEED_NAME = 'history'
3d3dddc9 3801 _TESTS = [{
3802 'url': ':ythistory',
3803 'only_matching': True,
3804 }]
1ed5b5c9
JMF
3805
3806
15870e90
PH
3807class YoutubeTruncatedURLIE(InfoExtractor):
3808 IE_NAME = 'youtube:truncated_url'
3809 IE_DESC = False # Do not list
975d35db 3810 _VALID_URL = r'''(?x)
b95aab84
PH
3811 (?:https?://)?
3812 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3813 (?:watch\?(?:
c4808c60 3814 feature=[a-z_]+|
b95aab84
PH
3815 annotation_id=annotation_[^&]+|
3816 x-yt-cl=[0-9]+|
c1708b89 3817 hl=[^&]*|
287be8c6 3818 t=[0-9]+
b95aab84
PH
3819 )?
3820 |
3821 attribution_link\?a=[^&]+
3822 )
3823 $
975d35db 3824 '''
15870e90 3825
c4808c60 3826 _TESTS = [{
2d3d2997 3827 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3828 'only_matching': True,
dc2fc736 3829 }, {
2d3d2997 3830 'url': 'https://www.youtube.com/watch?',
dc2fc736 3831 'only_matching': True,
b95aab84
PH
3832 }, {
3833 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3834 'only_matching': True,
3835 }, {
3836 'url': 'https://www.youtube.com/watch?feature=foo',
3837 'only_matching': True,
c1708b89
PH
3838 }, {
3839 'url': 'https://www.youtube.com/watch?hl=en-GB',
3840 'only_matching': True,
287be8c6
PH
3841 }, {
3842 'url': 'https://www.youtube.com/watch?t=2372',
3843 'only_matching': True,
c4808c60
PH
3844 }]
3845
15870e90
PH
3846 def _real_extract(self, url):
3847 raise ExtractorError(
78caa52a
PH
3848 'Did you forget to quote the URL? Remember that & is a meta '
3849 'character in most shells, so you want to put the URL in quotes, '
3867038a 3850 'like youtube-dl '
2d3d2997 3851 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3852 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3853 expected=True)
772fd5cc
PH
3854
3855
3856class YoutubeTruncatedIDIE(InfoExtractor):
3857 IE_NAME = 'youtube:truncated_id'
3858 IE_DESC = False # Do not list
b95aab84 3859 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3860
3861 _TESTS = [{
3862 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3863 'only_matching': True,
3864 }]
3865
3866 def _real_extract(self, url):
3867 video_id = self._match_id(url)
3868 raise ExtractorError(
3869 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3870 expected=True)