]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube:tab] Reload with unavailable videos for all playlists
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
de7f3446 56class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
57 """Provide base functions for Youtube extractors"""
58 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 59 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
60
61 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
62 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
63 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 64
3462ffa8 65 _RESERVED_NAMES = (
cd7c66cf 66 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
67 r'movies|results|shared|hashtag|trending|feed|feeds|'
68 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 69
b2e8bc1b
JMF
70 _NETRC_MACHINE = 'youtube'
71 # If True it will raise an error if no login info is provided
72 _LOGIN_REQUIRED = False
73
70d5c17b 74 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 75
25f14e9f
S
76 def _ids_to_results(self, ids):
77 return [
78 self.url_result(vid_id, 'Youtube', video_id=vid_id)
79 for vid_id in ids]
80
b2e8bc1b 81 def _login(self):
83317f69 82 """
83 Attempt to log in to YouTube.
84 True is returned if successful or skipped.
85 False is returned if login failed.
86
87 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 """
68217024 89 username, password = self._get_login_info()
b2e8bc1b
JMF
90 # No authentication to be performed
91 if username is None:
70d35d16 92 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 93 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 94 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
95 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 96 return True
b2e8bc1b 97
7cc3570e
PH
98 login_page = self._download_webpage(
99 self._LOGIN_URL, None,
69ea8ca4
PH
100 note='Downloading login page',
101 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
102 if login_page is False:
103 return
b2e8bc1b 104
1212e997 105 login_form = self._hidden_inputs(login_page)
c5e8d7af 106
e00eb564
S
107 def req(url, f_req, note, errnote):
108 data = login_form.copy()
109 data.update({
110 'pstMsg': 1,
111 'checkConnection': 'youtube',
112 'checkedDomains': 'youtube',
113 'hl': 'en',
114 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 115 'f.req': json.dumps(f_req),
e00eb564
S
116 'flowName': 'GlifWebSignIn',
117 'flowEntry': 'ServiceLogin',
baf67a60
S
118 # TODO: reverse actual botguard identifier generation algo
119 'bgRequest': '["identifier",""]',
041bc3ad 120 })
e00eb564
S
121 return self._download_json(
122 url, None, note=note, errnote=errnote,
123 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
124 fatal=False,
125 data=urlencode_postdata(data), headers={
126 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
127 'Google-Accounts-XSRF': 1,
128 })
129
3995d37d 130 def warn(message):
6a39ee13 131 self.report_warning(message)
3995d37d
S
132
133 lookup_req = [
134 username,
135 None, [], None, 'US', None, None, 2, False, True,
136 [
137 None, None,
138 [2, 1, None, 1,
139 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
140 None, [], 4],
141 1, [None, None, []], None, None, None, True
142 ],
143 username,
144 ]
145
e00eb564 146 lookup_results = req(
3995d37d 147 self._LOOKUP_URL, lookup_req,
e00eb564
S
148 'Looking up account info', 'Unable to look up account info')
149
150 if lookup_results is False:
151 return False
041bc3ad 152
3995d37d
S
153 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
154 if not user_hash:
155 warn('Unable to extract user hash')
156 return False
157
158 challenge_req = [
159 user_hash,
160 None, 1, None, [1, None, None, None, [password, None, True]],
161 [
162 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ]]
83317f69 165
3995d37d
S
166 challenge_results = req(
167 self._CHALLENGE_URL, challenge_req,
168 'Logging in', 'Unable to log in')
83317f69 169
3995d37d 170 if challenge_results is False:
e00eb564 171 return
83317f69 172
3995d37d
S
173 login_res = try_get(challenge_results, lambda x: x[0][5], list)
174 if login_res:
175 login_msg = try_get(login_res, lambda x: x[5], compat_str)
176 warn(
177 'Unable to login: %s' % 'Invalid password'
178 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
179 return False
180
181 res = try_get(challenge_results, lambda x: x[0][-1], list)
182 if not res:
183 warn('Unable to extract result entry')
184 return False
185
9a6628aa
S
186 login_challenge = try_get(res, lambda x: x[0][0], list)
187 if login_challenge:
188 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
189 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
190 # SEND_SUCCESS - TFA code has been successfully sent to phone
191 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 192 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
193 if status == 'QUOTA_EXCEEDED':
194 warn('Exceeded the limit of TFA codes, try later')
195 return False
196
197 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
198 if not tl:
199 warn('Unable to extract TL')
200 return False
201
202 tfa_code = self._get_tfa_info('2-step verification code')
203
204 if not tfa_code:
205 warn(
206 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
207 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
208 return False
209
210 tfa_code = remove_start(tfa_code, 'G-')
211
212 tfa_req = [
213 user_hash, None, 2, None,
214 [
215 9, None, None, None, None, None, None, None,
216 [None, tfa_code, True, 2]
217 ]]
218
219 tfa_results = req(
220 self._TFA_URL.format(tl), tfa_req,
221 'Submitting TFA code', 'Unable to submit TFA code')
222
223 if tfa_results is False:
224 return False
225
226 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
227 if tfa_res:
228 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
229 warn(
230 'Unable to finish TFA: %s' % 'Invalid TFA code'
231 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
232 return False
233
234 check_cookie_url = try_get(
235 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
236 else:
237 CHALLENGES = {
238 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
239 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
240 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
241 }
242 challenge = CHALLENGES.get(
243 challenge_str,
244 '%s returned error %s.' % (self.IE_NAME, challenge_str))
245 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
246 return False
3995d37d
S
247 else:
248 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
249
250 if not check_cookie_url:
251 warn('Unable to extract CheckCookie URL')
252 return False
e00eb564
S
253
254 check_cookie_results = self._download_webpage(
3995d37d
S
255 check_cookie_url, None, 'Checking cookie', fatal=False)
256
257 if check_cookie_results is False:
258 return False
e00eb564 259
3995d37d
S
260 if 'https://myaccount.google.com/' not in check_cookie_results:
261 warn('Unable to log in')
b2e8bc1b 262 return False
e00eb564 263
b2e8bc1b
JMF
264 return True
265
cce889b9 266 def _initialize_consent(self):
267 cookies = self._get_cookies('https://www.youtube.com/')
268 if cookies.get('__Secure-3PSID'):
269 return
270 consent_id = None
271 consent = cookies.get('CONSENT')
272 if consent:
273 if 'YES' in consent.value:
274 return
275 consent_id = self._search_regex(
276 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
277 if not consent_id:
278 consent_id = random.randint(100, 999)
279 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 280
b2e8bc1b 281 def _real_initialize(self):
cce889b9 282 self._initialize_consent()
b2e8bc1b
JMF
283 if self._downloader is None:
284 return
b2e8bc1b
JMF
285 if not self._login():
286 return
c5e8d7af 287
f4f751af 288 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
289 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 290 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 291 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
292 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 293
a5c56234
M
294 def _generate_sapisidhash_header(self):
295 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
296 if sapisid_cookie is None:
297 return
298 time_now = round(time.time())
299 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
300 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
301
302 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 303 note='Downloading API JSON', errnote='Unable to download API page',
304 context=None, api_key=None):
305
306 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 307 data.update(query)
f4f751af 308 real_headers = self._generate_api_headers()
309 real_headers.update({'content-type': 'application/json'})
310 if headers:
311 real_headers.update(headers)
545cc85d 312 return self._download_json(
a5c56234
M
313 'https://www.youtube.com/youtubei/v1/%s' % ep,
314 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 315 data=json.dumps(data).encode('utf8'), headers=real_headers,
316 query={'key': api_key or self._extract_api_key()})
317
318 def _extract_api_key(self, ytcfg=None):
319 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 320
8bdd16b4 321 def _extract_yt_initial_data(self, video_id, webpage):
322 return self._parse_json(
323 self._search_regex(
29f7c58a 324 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 325 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 326 video_id)
0c148415 327
a1c5d2ca
M
328 def _extract_identity_token(self, webpage, item_id):
329 ytcfg = self._extract_ytcfg(item_id, webpage)
330 if ytcfg:
331 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
332 if token:
333 return token
334 return self._search_regex(
335 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
336 'identity token', default=None)
337
338 @staticmethod
339 def _extract_account_syncid(data):
8ea3f7b9 340 """
341 Extract syncId required to download private playlists of secondary channels
342 @param data Either response or ytcfg
343 """
344 sync_ids = (try_get(
345 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
346 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
347 if len(sync_ids) >= 2 and sync_ids[1]:
348 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
349 # and just "user_syncid||" for primary channel. We only want the channel_syncid
350 return sync_ids[0]
8ea3f7b9 351 # ytcfg includes channel_syncid if on secondary channel
352 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 353
29f7c58a 354 def _extract_ytcfg(self, video_id, webpage):
355 return self._parse_json(
356 self._search_regex(
357 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 358 default='{}'), video_id, fatal=False) or {}
359
360 def __extract_client_version(self, ytcfg):
361 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
362
363 def _extract_context(self, ytcfg=None):
364 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
365 if context:
366 return context
367
368 # Recreate the client context (required)
369 client_version = self.__extract_client_version(ytcfg)
370 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
371 context = {
372 'client': {
373 'clientName': client_name,
374 'clientVersion': client_version,
375 }
376 }
377 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
378 if visitor_data:
379 context['client']['visitorData'] = visitor_data
380 return context
381
382 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
383 headers = {
384 'X-YouTube-Client-Name': '1',
385 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
386 }
387 if identity_token:
388 headers['x-youtube-identity-token'] = identity_token
389 if account_syncid:
390 headers['X-Goog-PageId'] = account_syncid
391 headers['X-Goog-AuthUser'] = 0
392 if visitor_data:
393 headers['x-goog-visitor-id'] = visitor_data
394 auth = self._generate_sapisidhash_header()
395 if auth is not None:
396 headers['Authorization'] = auth
397 headers['X-Origin'] = 'https://www.youtube.com'
398 return headers
29f7c58a 399
30a074c2 400 def _extract_video(self, renderer):
401 video_id = renderer.get('videoId')
402 title = try_get(
403 renderer,
404 (lambda x: x['title']['runs'][0]['text'],
405 lambda x: x['title']['simpleText']), compat_str)
406 description = try_get(
407 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
408 compat_str)
409 duration = parse_duration(try_get(
410 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
411 view_count_text = try_get(
412 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
413 view_count = str_to_int(self._search_regex(
414 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
415 'view count', default=None))
416 uploader = try_get(
bc2ca1bb 417 renderer,
418 (lambda x: x['ownerText']['runs'][0]['text'],
419 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 420 return {
39ed931e 421 '_type': 'url',
30a074c2 422 'ie_key': YoutubeIE.ie_key(),
423 'id': video_id,
424 'url': video_id,
425 'title': title,
426 'description': description,
427 'duration': duration,
428 'view_count': view_count,
429 'uploader': uploader,
430 }
431
0c148415 432
360e1ca5 433class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 434 IE_DESC = 'YouTube.com'
bc2ca1bb 435 _INVIDIOUS_SITES = (
436 # invidious-redirect websites
437 r'(?:www\.)?redirect\.invidious\.io',
438 r'(?:(?:www|dev)\.)?invidio\.us',
439 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
440 r'(?:www\.)?invidious\.pussthecat\.org',
441 r'(?:www\.)?invidious\.048596\.xyz',
442 r'(?:www\.)?invidious\.zee\.li',
443 r'(?:www\.)?vid\.puffyan\.us',
444 r'(?:(?:www|au)\.)?ytprivate\.com',
445 r'(?:www\.)?invidious\.namazso\.eu',
446 r'(?:www\.)?invidious\.ethibox\.fr',
447 r'(?:www\.)?inv\.skyn3t\.in',
448 r'(?:www\.)?invidious\.himiko\.cloud',
449 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
450 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
451 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
452 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
453 # youtube-dl invidious instances list
454 r'(?:(?:www|no)\.)?invidiou\.sh',
455 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
456 r'(?:www\.)?invidious\.kabi\.tk',
457 r'(?:www\.)?invidious\.13ad\.de',
458 r'(?:www\.)?invidious\.mastodon\.host',
459 r'(?:www\.)?invidious\.zapashcanon\.fr',
460 r'(?:www\.)?invidious\.kavin\.rocks',
461 r'(?:www\.)?invidious\.tube',
462 r'(?:www\.)?invidiou\.site',
463 r'(?:www\.)?invidious\.site',
464 r'(?:www\.)?invidious\.xyz',
465 r'(?:www\.)?invidious\.nixnet\.xyz',
466 r'(?:www\.)?invidious\.drycat\.fr',
467 r'(?:www\.)?tube\.poal\.co',
468 r'(?:www\.)?tube\.connect\.cafe',
469 r'(?:www\.)?vid\.wxzm\.sx',
470 r'(?:www\.)?vid\.mint\.lgbt',
471 r'(?:www\.)?yewtu\.be',
472 r'(?:www\.)?yt\.elukerio\.org',
473 r'(?:www\.)?yt\.lelux\.fi',
474 r'(?:www\.)?invidious\.ggc-project\.de',
475 r'(?:www\.)?yt\.maisputain\.ovh',
476 r'(?:www\.)?invidious\.toot\.koeln',
477 r'(?:www\.)?invidious\.fdn\.fr',
478 r'(?:www\.)?watch\.nettohikari\.com',
479 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
480 r'(?:www\.)?qklhadlycap4cnod\.onion',
481 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
482 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
483 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
484 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
485 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
486 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
487 )
cb7dfeea 488 _VALID_URL = r"""(?x)^
c5e8d7af 489 (
edb53e2d 490 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 491 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
492 (?:www\.)?deturl\.com/www\.youtube\.com|
493 (?:www\.)?pwnyoutube\.com|
494 (?:www\.)?hooktube\.com|
495 (?:www\.)?yourepeat\.com|
496 tube\.majestyc\.net|
497 %(invidious)s|
498 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
499 (?:.*?\#/)? # handle anchor (#/) redirect urls
500 (?: # the various things that can precede the ID:
ac7553d0 501 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 502 |(?: # or the v= param in all its forms
f7000f3a 503 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 504 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 505 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
506 v=
507 )
f4b05232 508 ))
cbaed4bb
S
509 |(?:
510 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
511 vid\.plus| # or vid.plus/xxxx
512 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 513 %(invidious)s
cbaed4bb 514 )/
edb53e2d 515 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 516 )
c5e8d7af 517 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 518 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
519 (?!.*?\blist=
520 (?:
521 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
522 WL # WL are handled by the watch later IE
523 )
524 )
c5e8d7af 525 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 526 $""" % {
527 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
528 'invidious': '|'.join(_INVIDIOUS_SITES),
529 }
e40c758c 530 _PLAYER_INFO_RE = (
cc2db878 531 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
532 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 533 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 534 )
2c62dc26 535 _formats = {
c2d3cb4c 536 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
537 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
538 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
539 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
540 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
541 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
542 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
543 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 544 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 545 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
546 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
547 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
548 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
549 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
550 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 551 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 552 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
553 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 554
555
556 # 3D videos
c2d3cb4c 557 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
558 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
559 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
560 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 561 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
562 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
563 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 564
96fb5605 565 # Apple HTTP Live Streaming
11f12195 566 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 567 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
568 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
569 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
570 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
571 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 572 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
573 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
574
575 # DASH mp4 video
d23028a8
S
576 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 581 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
582 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
583 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
586 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
587 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 588
f6f1fc92 589 # Dash mp4 audio
d23028a8
S
590 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
591 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
592 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
593 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
594 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
595 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
596 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
597
598 # Dash webm
d23028a8
S
599 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
600 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
606 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 614 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
615 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
616 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
617 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
620 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
621
622 # Dash webm audio
d23028a8
S
623 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
624 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 625
0857baad 626 # Dash webm audio with opus inside
d23028a8
S
627 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
628 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
629 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 630
ce6b9a2d
PH
631 # RTMP (unnamed)
632 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
633
634 # av01 video only formats sometimes served with "unknown" codecs
635 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
636 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 639 }
29f7c58a 640 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 641
fd5c4aab
S
642 _GEO_BYPASS = False
643
78caa52a 644 IE_NAME = 'youtube'
2eb88d95
PH
645 _TESTS = [
646 {
2d3d2997 647 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
648 'info_dict': {
649 'id': 'BaW_jenozKc',
650 'ext': 'mp4',
3867038a 651 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
652 'uploader': 'Philipp Hagemeister',
653 'uploader_id': 'phihag',
ec85ded8 654 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
655 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
656 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 657 'upload_date': '20121002',
3867038a 658 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 659 'categories': ['Science & Technology'],
3867038a 660 'tags': ['youtube-dl'],
556dbe7f 661 'duration': 10,
dbdaaa23 662 'view_count': int,
3e7c1224
PH
663 'like_count': int,
664 'dislike_count': int,
7c80519c 665 'start_time': 1,
297a564b 666 'end_time': 9,
2eb88d95 667 }
0e853ca4 668 },
fccd3771 669 {
4bc3a23e
PH
670 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
671 'note': 'Embed-only video (#1746)',
672 'info_dict': {
673 'id': 'yZIXLfi8CZQ',
674 'ext': 'mp4',
675 'upload_date': '20120608',
676 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
677 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
678 'uploader': 'SET India',
94bfcd23 679 'uploader_id': 'setindia',
ec85ded8 680 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 681 'age_limit': 18,
545cc85d 682 },
683 'skip': 'Private video',
fccd3771 684 },
11b56058 685 {
8bdd16b4 686 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
687 'note': 'Use the first video ID in the URL',
688 'info_dict': {
689 'id': 'BaW_jenozKc',
690 'ext': 'mp4',
3867038a 691 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
692 'uploader': 'Philipp Hagemeister',
693 'uploader_id': 'phihag',
ec85ded8 694 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 695 'upload_date': '20121002',
3867038a 696 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 697 'categories': ['Science & Technology'],
3867038a 698 'tags': ['youtube-dl'],
556dbe7f 699 'duration': 10,
dbdaaa23 700 'view_count': int,
11b56058
PM
701 'like_count': int,
702 'dislike_count': int,
34a7de29
S
703 },
704 'params': {
705 'skip_download': True,
706 },
11b56058 707 },
dd27fd17 708 {
2d3d2997 709 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
710 'note': '256k DASH audio (format 141) via DASH manifest',
711 'info_dict': {
712 'id': 'a9LDPn-MO4I',
713 'ext': 'm4a',
714 'upload_date': '20121002',
715 'uploader_id': '8KVIDEO',
ec85ded8 716 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
717 'description': '',
718 'uploader': '8KVIDEO',
719 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 720 },
4bc3a23e
PH
721 'params': {
722 'youtube_include_dash_manifest': True,
723 'format': '141',
4919603f 724 },
de3c7fe0 725 'skip': 'format 141 not served anymore',
dd27fd17 726 },
8bdd16b4 727 # DASH manifest with encrypted signature
728 {
729 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
730 'info_dict': {
731 'id': 'IB3lcPjvWLA',
732 'ext': 'm4a',
733 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
734 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
735 'duration': 244,
736 'uploader': 'AfrojackVEVO',
737 'uploader_id': 'AfrojackVEVO',
738 'upload_date': '20131011',
cc2db878 739 'abr': 129.495,
8bdd16b4 740 },
741 'params': {
742 'youtube_include_dash_manifest': True,
743 'format': '141/bestaudio[ext=m4a]',
744 },
745 },
aa79ac0c
PH
746 # Controversy video
747 {
748 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
749 'info_dict': {
750 'id': 'T4XJQO3qol8',
751 'ext': 'mp4',
556dbe7f 752 'duration': 219,
aa79ac0c 753 'upload_date': '20100909',
4fe54c12 754 'uploader': 'Amazing Atheist',
aa79ac0c 755 'uploader_id': 'TheAmazingAtheist',
ec85ded8 756 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 757 'title': 'Burning Everyone\'s Koran',
545cc85d 758 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 759 }
c522adb1 760 },
dd2d55f1 761 # Normal age-gate video (embed allowed)
c522adb1 762 {
2d3d2997 763 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
764 'info_dict': {
765 'id': 'HtVdAasjOgU',
766 'ext': 'mp4',
767 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 768 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 769 'duration': 142,
c522adb1
JMF
770 'uploader': 'The Witcher',
771 'uploader_id': 'WitcherGame',
ec85ded8 772 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 773 'upload_date': '20140605',
34952f09 774 'age_limit': 18,
c522adb1
JMF
775 },
776 },
8bdd16b4 777 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
778 # YouTube Red ad is not captured for creator
779 {
780 'url': '__2ABJjxzNo',
781 'info_dict': {
782 'id': '__2ABJjxzNo',
783 'ext': 'mp4',
784 'duration': 266,
785 'upload_date': '20100430',
786 'uploader_id': 'deadmau5',
787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 788 'creator': 'deadmau5',
789 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 790 'uploader': 'deadmau5',
791 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 792 'alt_title': 'Some Chords',
8bdd16b4 793 },
794 'expected_warnings': [
795 'DASH manifest missing',
796 ]
797 },
067aa17e 798 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
799 {
800 'url': 'lqQg6PlCWgI',
801 'info_dict': {
802 'id': 'lqQg6PlCWgI',
803 'ext': 'mp4',
556dbe7f 804 'duration': 6085,
90227264 805 'upload_date': '20150827',
cbe2bd91 806 'uploader_id': 'olympic',
ec85ded8 807 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 808 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 809 'uploader': 'Olympic',
cbe2bd91
PH
810 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
811 },
812 'params': {
813 'skip_download': 'requires avconv',
e52a40ab 814 }
cbe2bd91 815 },
6271f1ca
PH
816 # Non-square pixels
817 {
818 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
819 'info_dict': {
820 'id': '_b-2C3KPAM0',
821 'ext': 'mp4',
822 'stretched_ratio': 16 / 9.,
556dbe7f 823 'duration': 85,
6271f1ca
PH
824 'upload_date': '20110310',
825 'uploader_id': 'AllenMeow',
ec85ded8 826 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 827 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 828 'uploader': '孫ᄋᄅ',
6271f1ca
PH
829 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
830 },
06b491eb
S
831 },
832 # url_encoded_fmt_stream_map is empty string
833 {
834 'url': 'qEJwOuvDf7I',
835 'info_dict': {
836 'id': 'qEJwOuvDf7I',
f57b7835 837 'ext': 'webm',
06b491eb
S
838 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
839 'description': '',
840 'upload_date': '20150404',
841 'uploader_id': 'spbelect',
842 'uploader': 'Наблюдатели Петербурга',
843 },
844 'params': {
845 'skip_download': 'requires avconv',
e323cf3f
S
846 },
847 'skip': 'This live event has ended.',
06b491eb 848 },
067aa17e 849 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
850 {
851 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
852 'info_dict': {
853 'id': 'FIl7x6_3R5Y',
eb6793ba 854 'ext': 'webm',
da77d856
S
855 'title': 'md5:7b81415841e02ecd4313668cde88737a',
856 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 857 'duration': 220,
da77d856
S
858 'upload_date': '20150625',
859 'uploader_id': 'dorappi2000',
ec85ded8 860 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 861 'uploader': 'dorappi2000',
eb6793ba 862 'formats': 'mincount:31',
da77d856 863 },
eb6793ba 864 'skip': 'not actual anymore',
2ee8f5d8 865 },
8a1a26ce
YCH
866 # DASH manifest with segment_list
867 {
868 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
869 'md5': '8ce563a1d667b599d21064e982ab9e31',
870 'info_dict': {
871 'id': 'CsmdDsKjzN8',
872 'ext': 'mp4',
17ee98e1 873 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
874 'uploader': 'Airtek',
875 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
876 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
877 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
878 },
879 'params': {
880 'youtube_include_dash_manifest': True,
881 'format': '135', # bestvideo
be49068d
S
882 },
883 'skip': 'This live event has ended.',
2ee8f5d8 884 },
cf7e015f
S
885 {
886 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 887 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 888 'info_dict': {
545cc85d 889 'id': 'jvGDaLqkpTg',
890 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
891 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
892 },
893 'playlist': [{
894 'info_dict': {
545cc85d 895 'id': 'jvGDaLqkpTg',
cf7e015f 896 'ext': 'mp4',
545cc85d 897 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
898 'description': 'md5:e03b909557865076822aa169218d6a5d',
899 'duration': 10643,
900 'upload_date': '20161111',
901 'uploader': 'Team PGP',
902 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
904 },
905 }, {
906 'info_dict': {
545cc85d 907 'id': '3AKt1R1aDnw',
cf7e015f 908 'ext': 'mp4',
545cc85d 909 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
910 'description': 'md5:e03b909557865076822aa169218d6a5d',
911 'duration': 10991,
912 'upload_date': '20161111',
913 'uploader': 'Team PGP',
914 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
915 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
916 },
917 }, {
918 'info_dict': {
545cc85d 919 'id': 'RtAMM00gpVc',
cf7e015f 920 'ext': 'mp4',
545cc85d 921 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
922 'description': 'md5:e03b909557865076822aa169218d6a5d',
923 'duration': 10995,
924 'upload_date': '20161111',
925 'uploader': 'Team PGP',
926 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
927 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
928 },
929 }, {
930 'info_dict': {
545cc85d 931 'id': '6N2fdlP3C5U',
cf7e015f 932 'ext': 'mp4',
545cc85d 933 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
934 'description': 'md5:e03b909557865076822aa169218d6a5d',
935 'duration': 10990,
936 'upload_date': '20161111',
937 'uploader': 'Team PGP',
938 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
939 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
940 },
941 }],
942 'params': {
943 'skip_download': True,
944 },
cbaed4bb 945 },
f9f49d87 946 {
067aa17e 947 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
948 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
949 'info_dict': {
950 'id': 'gVfLd0zydlo',
951 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
952 },
953 'playlist_count': 2,
be49068d 954 'skip': 'Not multifeed anymore',
f9f49d87 955 },
cbaed4bb 956 {
2d3d2997 957 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 958 'only_matching': True,
0e49d9a6 959 },
6d4fc66b 960 {
2d3d2997 961 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
962 'only_matching': True,
963 },
0e49d9a6 964 {
067aa17e 965 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 966 # Also tests cut-off URL expansion in video description (see
067aa17e
S
967 # https://github.com/ytdl-org/youtube-dl/issues/1892,
968 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
969 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
970 'info_dict': {
971 'id': 'lsguqyKfVQg',
972 'ext': 'mp4',
973 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 974 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 975 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 976 'duration': 133,
0e49d9a6
LL
977 'upload_date': '20151119',
978 'uploader_id': 'IronSoulElf',
ec85ded8 979 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 980 'uploader': 'IronSoulElf',
eb6793ba
S
981 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
982 'track': 'Dark Walk - Position Music',
983 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 984 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
985 },
986 'params': {
987 'skip_download': True,
988 },
989 },
61f92af1 990 {
067aa17e 991 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
992 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
993 'only_matching': True,
994 },
313dfc45
LL
995 {
996 # Video with yt:stretch=17:0
997 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
998 'info_dict': {
999 'id': 'Q39EVAstoRM',
1000 'ext': 'mp4',
1001 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1002 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1003 'upload_date': '20151107',
1004 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1005 'uploader': 'CH GAMER DROID',
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
be49068d 1010 'skip': 'This video does not exist.',
313dfc45 1011 },
7caf9830
S
1012 {
1013 # Video licensed under Creative Commons
1014 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1015 'info_dict': {
1016 'id': 'M4gD1WSo5mA',
1017 'ext': 'mp4',
1018 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1019 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1020 'duration': 721,
7caf9830
S
1021 'upload_date': '20150127',
1022 'uploader_id': 'BerkmanCenter',
ec85ded8 1023 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1024 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1025 'license': 'Creative Commons Attribution license (reuse allowed)',
1026 },
1027 'params': {
1028 'skip_download': True,
1029 },
1030 },
fd050249
S
1031 {
1032 # Channel-like uploader_url
1033 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1034 'info_dict': {
1035 'id': 'eQcmzGIKrzg',
1036 'ext': 'mp4',
1037 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1038 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1039 'duration': 4060,
fd050249 1040 'upload_date': '20151119',
eb6793ba 1041 'uploader': 'Bernie Sanders',
fd050249 1042 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1043 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1044 'license': 'Creative Commons Attribution license (reuse allowed)',
1045 },
1046 'params': {
1047 'skip_download': True,
1048 },
1049 },
040ac686
S
1050 {
1051 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1052 'only_matching': True,
7f29cf54
S
1053 },
1054 {
067aa17e 1055 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1056 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1057 'only_matching': True,
6496ccb4
S
1058 },
1059 {
1060 # Rental video preview
1061 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1062 'info_dict': {
1063 'id': 'uGpuVWrhIzE',
1064 'ext': 'mp4',
1065 'title': 'Piku - Trailer',
1066 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1067 'upload_date': '20150811',
1068 'uploader': 'FlixMatrix',
1069 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1070 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1071 'license': 'Standard YouTube License',
1072 },
1073 'params': {
1074 'skip_download': True,
1075 },
eb6793ba 1076 'skip': 'This video is not available.',
022a5d66 1077 },
12afdc2a
S
1078 {
1079 # YouTube Red video with episode data
1080 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1081 'info_dict': {
1082 'id': 'iqKdEhx-dD4',
1083 'ext': 'mp4',
1084 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1085 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1086 'duration': 2085,
12afdc2a
S
1087 'upload_date': '20170118',
1088 'uploader': 'Vsauce',
1089 'uploader_id': 'Vsauce',
1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1091 'series': 'Mind Field',
1092 'season_number': 1,
1093 'episode_number': 1,
1094 },
1095 'params': {
1096 'skip_download': True,
1097 },
1098 'expected_warnings': [
1099 'Skipping DASH manifest',
1100 ],
1101 },
c7121fa7
S
1102 {
1103 # The following content has been identified by the YouTube community
1104 # as inappropriate or offensive to some audiences.
1105 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1106 'info_dict': {
1107 'id': '6SJNVb0GnPI',
1108 'ext': 'mp4',
1109 'title': 'Race Differences in Intelligence',
1110 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1111 'duration': 965,
1112 'upload_date': '20140124',
1113 'uploader': 'New Century Foundation',
1114 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1115 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1116 },
1117 'params': {
1118 'skip_download': True,
1119 },
545cc85d 1120 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1121 },
022a5d66
S
1122 {
1123 # itag 212
1124 'url': '1t24XAntNCY',
1125 'only_matching': True,
fd5c4aab
S
1126 },
1127 {
1128 # geo restricted to JP
1129 'url': 'sJL6WA-aGkQ',
1130 'only_matching': True,
1131 },
cd5a74a2
S
1132 {
1133 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1134 'only_matching': True,
1135 },
bc2ca1bb 1136 {
1137 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1138 'only_matching': True,
1139 },
1140 {
1141 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1142 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1143 'only_matching': True,
1144 },
825cd268
RA
1145 {
1146 # DRM protected
1147 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1148 'only_matching': True,
4fe54c12
S
1149 },
1150 {
1151 # Video with unsupported adaptive stream type formats
1152 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1153 'info_dict': {
1154 'id': 'Z4Vy8R84T1U',
1155 'ext': 'mp4',
1156 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1157 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1158 'duration': 433,
1159 'upload_date': '20130923',
1160 'uploader': 'Amelia Putri Harwita',
1161 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1162 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1163 'formats': 'maxcount:10',
1164 },
1165 'params': {
1166 'skip_download': True,
1167 'youtube_include_dash_manifest': False,
1168 },
5429d6a9 1169 'skip': 'not actual anymore',
5caabd3c 1170 },
1171 {
822b9d9c 1172 # Youtube Music Auto-generated description
5caabd3c 1173 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1174 'info_dict': {
1175 'id': 'MgNrAu2pzNs',
1176 'ext': 'mp4',
1177 'title': 'Voyeur Girl',
1178 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1179 'upload_date': '20190312',
5429d6a9
S
1180 'uploader': 'Stephen - Topic',
1181 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1182 'artist': 'Stephen',
1183 'track': 'Voyeur Girl',
1184 'album': 'it\'s too much love to know my dear',
1185 'release_date': '20190313',
1186 'release_year': 2019,
1187 },
1188 'params': {
1189 'skip_download': True,
1190 },
1191 },
66b48727
RA
1192 {
1193 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1194 'only_matching': True,
1195 },
011e75e6
S
1196 {
1197 # invalid -> valid video id redirection
1198 'url': 'DJztXj2GPfl',
1199 'info_dict': {
1200 'id': 'DJztXj2GPfk',
1201 'ext': 'mp4',
1202 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1203 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1204 'upload_date': '20090125',
1205 'uploader': 'Prochorowka',
1206 'uploader_id': 'Prochorowka',
1207 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1208 'artist': 'Panjabi MC',
1209 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1210 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1211 },
1212 'params': {
1213 'skip_download': True,
1214 },
545cc85d 1215 'skip': 'Video unavailable',
ea74e00b
DP
1216 },
1217 {
1218 # empty description results in an empty string
1219 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1220 'info_dict': {
1221 'id': 'x41yOUIvK2k',
1222 'ext': 'mp4',
1223 'title': 'IMG 3456',
1224 'description': '',
1225 'upload_date': '20170613',
1226 'uploader_id': 'ElevageOrVert',
1227 'uploader': 'ElevageOrVert',
1228 },
1229 'params': {
1230 'skip_download': True,
1231 },
1232 },
a0566bbf 1233 {
29f7c58a 1234 # with '};' inside yt initial data (see [1])
1235 # see [2] for an example with '};' inside ytInitialPlayerResponse
1236 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1237 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1238 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1239 'info_dict': {
1240 'id': 'CHqg6qOn4no',
1241 'ext': 'mp4',
1242 'title': 'Part 77 Sort a list of simple types in c#',
1243 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1244 'upload_date': '20130831',
1245 'uploader_id': 'kudvenkat',
1246 'uploader': 'kudvenkat',
1247 },
1248 'params': {
1249 'skip_download': True,
1250 },
1251 },
29f7c58a 1252 {
1253 # another example of '};' in ytInitialData
1254 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1255 'only_matching': True,
1256 },
1257 {
1258 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1259 'only_matching': True,
1260 },
545cc85d 1261 {
cc2db878 1262 # https://github.com/ytdl-org/youtube-dl/pull/28094
1263 'url': 'OtqTfy26tG0',
1264 'info_dict': {
1265 'id': 'OtqTfy26tG0',
1266 'ext': 'mp4',
1267 'title': 'Burn Out',
1268 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1269 'upload_date': '20141120',
1270 'uploader': 'The Cinematic Orchestra - Topic',
1271 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1272 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1273 'artist': 'The Cinematic Orchestra',
1274 'track': 'Burn Out',
1275 'album': 'Every Day',
1276 'release_data': None,
1277 'release_year': None,
1278 },
1279 'params': {
1280 'skip_download': True,
1281 },
545cc85d 1282 },
bc2ca1bb 1283 {
1284 # controversial video, only works with bpctr when authenticated with cookies
1285 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1286 'only_matching': True,
1287 },
f7ad7160 1288 {
1289 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1290 'url': 'cBvYw8_A0vQ',
1291 'info_dict': {
1292 'id': 'cBvYw8_A0vQ',
1293 'ext': 'mp4',
1294 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1295 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1296 'upload_date': '20201120',
1297 'uploader': 'Walk around Japan',
1298 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1299 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1300 },
1301 'params': {
1302 'skip_download': True,
1303 },
1304 },
2eb88d95
PH
1305 ]
1306
e0df6211
PH
1307 def __init__(self, *args, **kwargs):
1308 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1309 self._code_cache = {}
83799698 1310 self._player_cache = {}
e0df6211 1311
60064c53
PH
1312 def _signature_cache_id(self, example_sig):
1313 """ Return a string representation of a signature """
78caa52a 1314 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1315
e40c758c
S
1316 @classmethod
1317 def _extract_player_info(cls, player_url):
1318 for player_re in cls._PLAYER_INFO_RE:
1319 id_m = re.search(player_re, player_url)
1320 if id_m:
1321 break
1322 else:
c081b35c 1323 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1324 return id_m.group('id')
e40c758c
S
1325
1326 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1327 player_id = self._extract_player_info(player_url)
e0df6211 1328
c4417ddb 1329 # Read from filesystem cache
545cc85d 1330 func_id = 'js_%s_%s' % (
1331 player_id, self._signature_cache_id(example_sig))
c4417ddb 1332 assert os.path.basename(func_id) == func_id
a0e07d31 1333
69ea8ca4 1334 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1335 if cache_spec is not None:
78caa52a 1336 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1337
545cc85d 1338 if player_id not in self._code_cache:
1339 self._code_cache[player_id] = self._download_webpage(
e0df6211 1340 player_url, video_id,
545cc85d 1341 note='Downloading player ' + player_id,
69ea8ca4 1342 errnote='Download of %s failed' % player_url)
545cc85d 1343 code = self._code_cache[player_id]
1344 res = self._parse_sig_js(code)
e0df6211 1345
785521bf
PH
1346 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1347 cache_res = res(test_string)
1348 cache_spec = [ord(c) for c in cache_res]
83799698 1349
69ea8ca4 1350 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1351 return res
1352
60064c53 1353 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1354 def gen_sig_code(idxs):
1355 def _genslice(start, end, step):
78caa52a 1356 starts = '' if start == 0 else str(start)
8bcc8756 1357 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1358 steps = '' if step == 1 else (':%d' % step)
78caa52a 1359 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1360
1361 step = None
7af808a5
PH
1362 # Quelch pyflakes warnings - start will be set when step is set
1363 start = '(Never used)'
edf3e38e
PH
1364 for i, prev in zip(idxs[1:], idxs[:-1]):
1365 if step is not None:
1366 if i - prev == step:
1367 continue
1368 yield _genslice(start, prev, step)
1369 step = None
1370 continue
1371 if i - prev in [-1, 1]:
1372 step = i - prev
1373 start = prev
1374 continue
1375 else:
78caa52a 1376 yield 's[%d]' % prev
edf3e38e 1377 if step is None:
78caa52a 1378 yield 's[%d]' % i
edf3e38e
PH
1379 else:
1380 yield _genslice(start, i, step)
1381
78caa52a 1382 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1383 cache_res = func(test_string)
edf3e38e 1384 cache_spec = [ord(c) for c in cache_res]
78caa52a 1385 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1386 signature_id_tuple = '(%s)' % (
1387 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1388 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1389 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1390 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1391
e0df6211
PH
1392 def _parse_sig_js(self, jscode):
1393 funcname = self._search_regex(
abefc03f
S
1394 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1395 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1396 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1397 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1398 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1399 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1400 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1401 # Obsolete patterns
1402 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1403 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1404 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1405 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1406 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1407 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1408 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1409 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1410 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1411
1412 jsi = JSInterpreter(jscode)
1413 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1414 return lambda s: initial_function([s])
1415
545cc85d 1416 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1417 """Turn the encrypted s field into a working signature"""
6b37f0be 1418
c8bf86d5 1419 if player_url is None:
69ea8ca4 1420 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1421
69ea8ca4 1422 if player_url.startswith('//'):
78caa52a 1423 player_url = 'https:' + player_url
3c90cc8b
S
1424 elif not re.match(r'https?://', player_url):
1425 player_url = compat_urlparse.urljoin(
1426 'https://www.youtube.com', player_url)
c8bf86d5 1427 try:
62af3a0e 1428 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1429 if player_id not in self._player_cache:
1430 func = self._extract_signature_function(
60064c53 1431 video_id, player_url, s
c8bf86d5
PH
1432 )
1433 self._player_cache[player_id] = func
1434 func = self._player_cache[player_id]
1435 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1436 self._print_sig_code(func, s)
c8bf86d5
PH
1437 return func(s)
1438 except Exception as e:
1439 tb = traceback.format_exc()
1440 raise ExtractorError(
78caa52a 1441 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1442
545cc85d 1443 def _mark_watched(self, video_id, player_response):
21c340b8
S
1444 playback_url = url_or_none(try_get(
1445 player_response,
545cc85d 1446 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1447 if not playback_url:
1448 return
1449 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1450 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1451
1452 # cpn generation algorithm is reverse engineered from base.js.
1453 # In fact it works even with dummy cpn.
1454 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1455 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1456
1457 qs.update({
1458 'ver': ['2'],
1459 'cpn': [cpn],
1460 })
1461 playback_url = compat_urlparse.urlunparse(
15707c7e 1462 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1463
1464 self._download_webpage(
1465 playback_url, video_id, 'Marking watched',
1466 'Unable to mark watched', fatal=False)
1467
66c9fa36
S
1468 @staticmethod
1469 def _extract_urls(webpage):
1470 # Embedded YouTube player
1471 entries = [
1472 unescapeHTML(mobj.group('url'))
1473 for mobj in re.finditer(r'''(?x)
1474 (?:
1475 <iframe[^>]+?src=|
1476 data-video-url=|
1477 <embed[^>]+?src=|
1478 embedSWF\(?:\s*|
1479 <object[^>]+data=|
1480 new\s+SWFObject\(
1481 )
1482 (["\'])
1483 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1484 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1485 \1''', webpage)]
1486
1487 # lazyYT YouTube embed
1488 entries.extend(list(map(
1489 unescapeHTML,
1490 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1491
1492 # Wordpress "YouTube Video Importer" plugin
1493 matches = re.findall(r'''(?x)<div[^>]+
1494 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1495 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1496 entries.extend(m[-1] for m in matches)
1497
1498 return entries
1499
1500 @staticmethod
1501 def _extract_url(webpage):
1502 urls = YoutubeIE._extract_urls(webpage)
1503 return urls[0] if urls else None
1504
97665381
PH
1505 @classmethod
1506 def extract_id(cls, url):
1507 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1508 if mobj is None:
69ea8ca4 1509 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1510 video_id = mobj.group(2)
1511 return video_id
1512
545cc85d 1513 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1514 chapters_list = try_get(
8bdd16b4 1515 data,
84213ea8
S
1516 lambda x: x['playerOverlays']
1517 ['playerOverlayRenderer']
1518 ['decoratedPlayerBarRenderer']
1519 ['decoratedPlayerBarRenderer']
1520 ['playerBar']
1521 ['chapteredPlayerBarRenderer']
1522 ['chapters'],
1523 list)
1524 if not chapters_list:
1525 return
1526
1527 def chapter_time(chapter):
1528 return float_or_none(
1529 try_get(
1530 chapter,
1531 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1532 int),
1533 scale=1000)
1534 chapters = []
1535 for next_num, chapter in enumerate(chapters_list, start=1):
1536 start_time = chapter_time(chapter)
1537 if start_time is None:
1538 continue
1539 end_time = (chapter_time(chapters_list[next_num])
1540 if next_num < len(chapters_list) else duration)
1541 if end_time is None:
1542 continue
1543 title = try_get(
1544 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1545 compat_str)
1546 chapters.append({
1547 'start_time': start_time,
1548 'end_time': end_time,
1549 'title': title,
1550 })
1551 return chapters
1552
545cc85d 1553 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1554 return self._parse_json(self._search_regex(
1555 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1556 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1557
d92f5d5a 1558 @staticmethod
1559 def parse_time_text(time_text):
1560 """
1561 Parse the comment time text
1562 time_text is in the format 'X units ago (edited)'
1563 """
1564 time_text_split = time_text.split(' ')
1565 if len(time_text_split) >= 3:
1566 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1567
a1c5d2ca
M
1568 @staticmethod
1569 def _join_text_entries(runs):
1570 text = None
1571 for run in runs:
1572 if not isinstance(run, dict):
1573 continue
1574 sub_text = try_get(run, lambda x: x['text'], compat_str)
1575 if sub_text:
1576 if not text:
1577 text = sub_text
1578 continue
1579 text += sub_text
1580 return text
1581
1582 def _extract_comment(self, comment_renderer, parent=None):
1583 comment_id = comment_renderer.get('commentId')
1584 if not comment_id:
1585 return
1586 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1587 text = self._join_text_entries(comment_text_runs) or ''
1588 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1589 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1590 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1591 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1592 author_id = try_get(comment_renderer,
1593 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1594 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1595 lambda x: x['likeCount']), compat_str)) or 0
1596 author_thumbnail = try_get(comment_renderer,
1597 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1598
1599 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1600 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1601 return {
1602 'id': comment_id,
1603 'text': text,
d92f5d5a 1604 'timestamp': timestamp,
a1c5d2ca
M
1605 'time_text': time_text,
1606 'like_count': votes,
1607 'is_favorited': is_liked,
1608 'author': author,
1609 'author_id': author_id,
1610 'author_thumbnail': author_thumbnail,
1611 'author_is_uploader': author_is_uploader,
1612 'parent': parent or 'root'
1613 }
1614
1615 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1616 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1617
1618 def extract_thread(parent_renderer):
1619 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1620 if not parent:
1621 comment_counts[2] = 0
1622 for content in contents:
1623 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1624 comment_renderer = try_get(
1625 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1626 content, (lambda x: x['commentRenderer'], dict))
1627
1628 if not comment_renderer:
1629 continue
1630 comment = self._extract_comment(comment_renderer, parent)
1631 if not comment:
1632 continue
1633 comment_counts[0] += 1
1634 yield comment
1635 # Attempt to get the replies
1636 comment_replies_renderer = try_get(
1637 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1638
1639 if comment_replies_renderer:
1640 comment_counts[2] += 1
1641 comment_entries_iter = self._comment_entries(
f4f751af 1642 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1643 parent=comment.get('id'), session_token_list=session_token_list,
1644 comment_counts=comment_counts)
1645
1646 for reply_comment in comment_entries_iter:
1647 yield reply_comment
1648
1649 if not comment_counts:
1650 # comment so far, est. total comments, current comment thread #
1651 comment_counts = [0, 0, 0]
a1c5d2ca
M
1652
1653 # TODO: Generalize the download code with TabIE
f4f751af 1654 context = self._extract_context(ytcfg)
1655 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1656 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1657 first_continuation = False
1658 if parent is None:
1659 first_continuation = True
1660
1661 for page_num in itertools.count(0):
1662 if not continuation:
1663 break
f4f751af 1664 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1665 retries = self._downloader.params.get('extractor_retries', 3)
1666 count = -1
1667 last_error = None
1668
1669 while count < retries:
1670 count += 1
1671 if last_error:
1672 self.report_warning('%s. Retrying ...' % last_error)
1673 try:
1674 query = {
1675 'ctoken': continuation['ctoken'],
1676 'pbj': 1,
1677 'type': 'next',
1678 }
1679 if parent:
1680 query['action_get_comment_replies'] = 1
1681 else:
1682 query['action_get_comments'] = 1
1683
1684 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1685 if page_num == 0:
1686 if first_continuation:
d92f5d5a 1687 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1688 else:
d92f5d5a 1689 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1690 else:
d92f5d5a 1691 note_prefix = '%sDownloading comment%s page %d %s' % (
1692 ' ' if parent else '',
a1c5d2ca
M
1693 ' replies' if parent else '',
1694 page_num,
1695 comment_prog_str)
1696
1697 browse = self._download_json(
1698 'https://www.youtube.com/comment_service_ajax', None,
1699 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1700 headers=headers, query=query,
1701 data=urlencode_postdata({
1702 'session_token': session_token_list[0]
1703 }))
1704 except ExtractorError as e:
1705 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1706 if e.cause.code == 413:
d92f5d5a 1707 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1708 return
1709 # Downloading page may result in intermittent 5xx HTTP error
1710 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1711 last_error = 'HTTP Error %s' % e.cause.code
1712 if e.cause.code == 404:
d92f5d5a 1713 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1714 if count < retries:
1715 continue
1716 raise
1717 else:
1718 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1719 if session_token:
1720 session_token_list[0] = session_token
1721
1722 response = try_get(browse,
1723 (lambda x: x['response'],
1724 lambda x: x[1]['response'])) or {}
1725
1726 if response.get('continuationContents'):
1727 break
1728
1729 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1730 if browse.get('reload'):
d92f5d5a 1731 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1732
1733 # TODO: not tested, merged from old extractor
1734 err_msg = browse.get('externalErrorMessage')
1735 if err_msg:
1736 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1737
1738 # Youtube sometimes sends incomplete data
1739 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1740 last_error = 'Incomplete data received'
1741 if count >= retries:
6a39ee13 1742 raise ExtractorError(last_error)
a1c5d2ca
M
1743
1744 if not response:
1745 break
f4f751af 1746 visitor_data = try_get(
1747 response,
1748 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1749 compat_str) or visitor_data
a1c5d2ca
M
1750
1751 known_continuation_renderers = {
1752 'itemSectionContinuation': extract_thread,
1753 'commentRepliesContinuation': extract_thread
1754 }
1755
1756 # extract next root continuation from the results
1757 continuation_contents = try_get(
1758 response, lambda x: x['continuationContents'], dict) or {}
1759
1760 for key, value in continuation_contents.items():
1761 if key not in known_continuation_renderers:
1762 continue
1763 continuation_renderer = value
1764
1765 if first_continuation:
1766 first_continuation = False
1767 expected_comment_count = try_get(
1768 continuation_renderer,
1769 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1770 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1771 compat_str)
1772
1773 if expected_comment_count:
1774 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1775 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1776 yield comment_counts[1]
1777
1778 # TODO: cli arg.
1779 # 1/True for newest, 0/False for popular (default)
1780 comment_sort_index = int(True)
1781 sort_continuation_renderer = try_get(
1782 continuation_renderer,
1783 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1784 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1785 # If this fails, the initial continuation page
1786 # starts off with popular anyways.
1787 if sort_continuation_renderer:
1788 continuation = YoutubeTabIE._build_continuation_query(
1789 continuation=sort_continuation_renderer.get('continuation'),
1790 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1791 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1792 break
1793
1794 for entry in known_continuation_renderers[key](continuation_renderer):
1795 yield entry
1796
1797 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1798 break
1799
1800 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1801 """Entry for comment extraction"""
1802 comments = []
1803 known_entry_comment_renderers = (
1804 'itemSectionRenderer',
1805 )
1806 estimated_total = 0
1807 for entry in contents:
1808 for key, renderer in entry.items():
1809 if key not in known_entry_comment_renderers:
1810 continue
1811
1812 comment_iter = self._comment_entries(
1813 renderer,
1814 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1815 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1816 ytcfg=ytcfg,
a1c5d2ca
M
1817 session_token_list=[xsrf_token])
1818
1819 for comment in comment_iter:
1820 if isinstance(comment, int):
1821 estimated_total = comment
1822 continue
1823 comments.append(comment)
1824 break
d92f5d5a 1825 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1826 return {
1827 'comments': comments,
1828 'comment_count': len(comments),
1829 }
1830
c5e8d7af 1831 def _real_extract(self, url):
cf7e015f 1832 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1833 video_id = self._match_id(url)
1834 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1835 webpage_url = base_url + 'watch?v=' + video_id
1836 webpage = self._download_webpage(
cce889b9 1837 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1838
1839 player_response = None
1840 if webpage:
1841 player_response = self._extract_yt_initial_variable(
1842 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1843 video_id, 'initial player response')
f4f751af 1844
1845 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1846 if not player_response:
1847 player_response = self._call_api(
f4f751af 1848 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1849
1850 playability_status = player_response.get('playabilityStatus') or {}
1851 if playability_status.get('reason') == 'Sign in to confirm your age':
1852 pr = self._parse_json(try_get(compat_parse_qs(
1853 self._download_webpage(
1854 base_url + 'get_video_info', video_id,
1855 'Refetching age-gated info webpage',
1856 'unable to download video info webpage', query={
1857 'video_id': video_id,
7c60c33e 1858 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1859 }, fatal=False)),
1860 lambda x: x['player_response'][0],
1861 compat_str) or '{}', video_id)
1862 if pr:
1863 player_response = pr
1864
1865 trailer_video_id = try_get(
1866 playability_status,
1867 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1868 compat_str)
1869 if trailer_video_id:
1870 return self.url_result(
1871 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1872
545cc85d 1873 def get_text(x):
1874 if not x:
c2d125d9 1875 return
f7ad7160 1876 text = x.get('simpleText')
1877 if text and isinstance(text, compat_str):
1878 return text
1879 runs = x.get('runs')
1880 if not isinstance(runs, list):
1881 return
1882 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1883
545cc85d 1884 search_meta = (
1885 lambda x: self._html_search_meta(x, webpage, default=None)) \
1886 if webpage else lambda x: None
dbdaaa23 1887
545cc85d 1888 video_details = player_response.get('videoDetails') or {}
37357d21 1889 microformat = try_get(
545cc85d 1890 player_response,
1891 lambda x: x['microformat']['playerMicroformatRenderer'],
1892 dict) or {}
1893 video_title = video_details.get('title') \
1894 or get_text(microformat.get('title')) \
1895 or search_meta(['og:title', 'twitter:title', 'title'])
1896 video_description = video_details.get('shortDescription')
cf7e015f 1897
8fe10494 1898 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1899 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1900 multifeed_metadata_list = try_get(
1901 player_response,
1902 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1903 compat_str)
8fe10494
S
1904 if multifeed_metadata_list:
1905 entries = []
1906 feed_ids = []
1907 for feed in multifeed_metadata_list.split(','):
1908 # Unquote should take place before split on comma (,) since textual
1909 # fields may contain comma as well (see
067aa17e 1910 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1911 feed_data = compat_parse_qs(
1912 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1913
1914 def feed_entry(name):
545cc85d 1915 return try_get(
1916 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1917
1918 feed_id = feed_entry('id')
1919 if not feed_id:
1920 continue
1921 feed_title = feed_entry('title')
1922 title = video_title
1923 if feed_title:
1924 title += ' (%s)' % feed_title
8fe10494
S
1925 entries.append({
1926 '_type': 'url_transparent',
1927 'ie_key': 'Youtube',
1928 'url': smuggle_url(
545cc85d 1929 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1930 {'force_singlefeed': True}),
6b09401b 1931 'title': title,
8fe10494 1932 })
6b09401b 1933 feed_ids.append(feed_id)
8fe10494
S
1934 self.to_screen(
1935 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1936 % (', '.join(feed_ids), video_id))
545cc85d 1937 return self.playlist_result(
1938 entries, video_id, video_title, video_description)
8fe10494
S
1939 else:
1940 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1941
545cc85d 1942 formats = []
1943 itags = []
cc2db878 1944 itag_qualities = {}
545cc85d 1945 player_url = None
dca3ff4a 1946 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1947 streaming_data = player_response.get('streamingData') or {}
1948 streaming_formats = streaming_data.get('formats') or []
1949 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1950 for fmt in streaming_formats:
1951 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1952 continue
321bf820 1953
cc2db878 1954 itag = str_or_none(fmt.get('itag'))
1955 quality = fmt.get('quality')
1956 if itag and quality:
1957 itag_qualities[itag] = quality
1958 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1959 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1960 # number of fragment that would subsequently requested with (`&sq=N`)
1961 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1962 continue
1963
545cc85d 1964 fmt_url = fmt.get('url')
1965 if not fmt_url:
1966 sc = compat_parse_qs(fmt.get('signatureCipher'))
1967 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1968 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1969 if not (sc and fmt_url and encrypted_sig):
1970 continue
1971 if not player_url:
1972 if not webpage:
1973 continue
1974 player_url = self._search_regex(
1975 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1976 webpage, 'player URL', fatal=False)
1977 if not player_url:
201e9eaa 1978 continue
545cc85d 1979 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1980 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1981 fmt_url += '&' + sp + '=' + signature
1982
545cc85d 1983 if itag:
1984 itags.append(itag)
cc2db878 1985 tbr = float_or_none(
1986 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1987 dct = {
1988 'asr': int_or_none(fmt.get('audioSampleRate')),
1989 'filesize': int_or_none(fmt.get('contentLength')),
1990 'format_id': itag,
1991 'format_note': fmt.get('qualityLabel') or quality,
1992 'fps': int_or_none(fmt.get('fps')),
1993 'height': int_or_none(fmt.get('height')),
dca3ff4a 1994 'quality': q(quality),
cc2db878 1995 'tbr': tbr,
545cc85d 1996 'url': fmt_url,
1997 'width': fmt.get('width'),
1998 }
1999 mimetype = fmt.get('mimeType')
2000 if mimetype:
2001 mobj = re.match(
2002 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2003 if mobj:
2004 dct['ext'] = mimetype2ext(mobj.group(1))
2005 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2006 no_audio = dct.get('acodec') == 'none'
2007 no_video = dct.get('vcodec') == 'none'
2008 if no_audio:
2009 dct['vbr'] = tbr
2010 if no_video:
2011 dct['abr'] = tbr
2012 if no_audio or no_video:
545cc85d 2013 dct['downloader_options'] = {
2014 # Youtube throttles chunks >~10M
2015 'http_chunk_size': 10485760,
bf1317d2 2016 }
7c60c33e 2017 if dct.get('ext'):
2018 dct['container'] = dct['ext'] + '_dash'
545cc85d 2019 formats.append(dct)
2020
2021 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2022 if hls_manifest_url:
2023 for f in self._extract_m3u8_formats(
2024 hls_manifest_url, video_id, 'mp4', fatal=False):
2025 itag = self._search_regex(
2026 r'/itag/(\d+)', f['url'], 'itag', default=None)
2027 if itag:
2028 f['format_id'] = itag
2029 formats.append(f)
2030
1418a043 2031 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2032 dash_manifest_url = streaming_data.get('dashManifestUrl')
2033 if dash_manifest_url:
545cc85d 2034 for f in self._extract_mpd_formats(
2035 dash_manifest_url, video_id, fatal=False):
cc2db878 2036 itag = f['format_id']
2037 if itag in itags:
2038 continue
dca3ff4a 2039 if itag in itag_qualities:
2040 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2041 # but kept to maintain feature parity (and code similarity) with youtube-dl
2042 # Remove if this causes any issues with sorting in future
2043 f['quality'] = q(itag_qualities[itag])
545cc85d 2044 filesize = int_or_none(self._search_regex(
2045 r'/clen/(\d+)', f.get('fragment_base_url')
2046 or f['url'], 'file size', default=None))
2047 if filesize:
2048 f['filesize'] = filesize
cc2db878 2049 formats.append(f)
bf1317d2 2050
545cc85d 2051 if not formats:
63ad4d43 2052 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2053 self.raise_no_formats(
545cc85d 2054 'This video is DRM protected.', expected=True)
2055 pemr = try_get(
2056 playability_status,
2057 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2058 dict) or {}
2059 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2060 subreason = pemr.get('subreason')
2061 if subreason:
2062 subreason = clean_html(get_text(subreason))
2063 if subreason == 'The uploader has not made this video available in your country.':
2064 countries = microformat.get('availableCountries')
2065 if not countries:
2066 regions_allowed = search_meta('regionsAllowed')
2067 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2068 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2069 reason += '\n' + subreason
2070 if reason:
b7da73eb 2071 self.raise_no_formats(reason, expected=True)
bf1317d2 2072
545cc85d 2073 self._sort_formats(formats)
bf1317d2 2074
545cc85d 2075 keywords = video_details.get('keywords') or []
2076 if not keywords and webpage:
2077 keywords = [
2078 unescapeHTML(m.group('content'))
2079 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2080 for keyword in keywords:
2081 if keyword.startswith('yt:stretch='):
46fff710 2082 stretch_ratio = map(
2083 lambda x: int_or_none(x, default=0),
2084 keyword.split('=')[1].split(':'))
2085 w, h = (list(stretch_ratio) + [0])[:2]
545cc85d 2086 if w > 0 and h > 0:
2087 ratio = w / h
2088 for f in formats:
2089 if f.get('vcodec') != 'none':
2090 f['stretched_ratio'] = ratio
6449cd80 2091
545cc85d 2092 thumbnails = []
2093 for container in (video_details, microformat):
2094 for thumbnail in (try_get(
2095 container,
2096 lambda x: x['thumbnail']['thumbnails'], list) or []):
2097 thumbnail_url = thumbnail.get('url')
2098 if not thumbnail_url:
bf1317d2 2099 continue
1988fab7 2100 # Sometimes youtube gives a wrong thumbnail URL. See:
2101 # https://github.com/yt-dlp/yt-dlp/issues/233
2102 # https://github.com/ytdl-org/youtube-dl/issues/28023
2103 if 'maxresdefault' in thumbnail_url:
2104 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2105 thumbnails.append({
2106 'height': int_or_none(thumbnail.get('height')),
2107 'url': thumbnail_url,
2108 'width': int_or_none(thumbnail.get('width')),
2109 })
2110 if thumbnails:
2111 break
a6211d23 2112 else:
545cc85d 2113 thumbnail = search_meta(['og:image', 'twitter:image'])
2114 if thumbnail:
2115 thumbnails = [{'url': thumbnail}]
2116
2117 category = microformat.get('category') or search_meta('genre')
2118 channel_id = video_details.get('channelId') \
2119 or microformat.get('externalChannelId') \
2120 or search_meta('channelId')
2121 duration = int_or_none(
2122 video_details.get('lengthSeconds')
2123 or microformat.get('lengthSeconds')) \
2124 or parse_duration(search_meta('duration'))
2125 is_live = video_details.get('isLive')
2126 owner_profile_url = microformat.get('ownerProfileUrl')
2127
2128 info = {
2129 'id': video_id,
2130 'title': self._live_title(video_title) if is_live else video_title,
2131 'formats': formats,
2132 'thumbnails': thumbnails,
2133 'description': video_description,
2134 'upload_date': unified_strdate(
2135 microformat.get('uploadDate')
2136 or search_meta('uploadDate')),
2137 'uploader': video_details['author'],
2138 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2139 'uploader_url': owner_profile_url,
2140 'channel_id': channel_id,
2141 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2142 'duration': duration,
2143 'view_count': int_or_none(
2144 video_details.get('viewCount')
2145 or microformat.get('viewCount')
2146 or search_meta('interactionCount')),
2147 'average_rating': float_or_none(video_details.get('averageRating')),
2148 'age_limit': 18 if (
2149 microformat.get('isFamilySafe') is False
2150 or search_meta('isFamilyFriendly') == 'false'
2151 or search_meta('og:restrictions:age') == '18+') else 0,
2152 'webpage_url': webpage_url,
2153 'categories': [category] if category else None,
2154 'tags': keywords,
2155 'is_live': is_live,
2156 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2157 'was_live': video_details.get('isLiveContent'),
545cc85d 2158 }
b477fc13 2159
545cc85d 2160 pctr = try_get(
2161 player_response,
2162 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2163 subtitles = {}
2164 if pctr:
2165 def process_language(container, base_url, lang_code, query):
2166 lang_subs = []
2167 for fmt in self._SUBTITLE_FORMATS:
2168 query.update({
2169 'fmt': fmt,
2170 })
2171 lang_subs.append({
2172 'ext': fmt,
2173 'url': update_url_query(base_url, query),
2174 })
2175 container[lang_code] = lang_subs
7e72694b 2176
545cc85d 2177 for caption_track in (pctr.get('captionTracks') or []):
2178 base_url = caption_track.get('baseUrl')
2179 if not base_url:
2180 continue
2181 if caption_track.get('kind') != 'asr':
2182 lang_code = caption_track.get('languageCode')
2183 if not lang_code:
2184 continue
2185 process_language(
2186 subtitles, base_url, lang_code, {})
2187 continue
2188 automatic_captions = {}
2189 for translation_language in (pctr.get('translationLanguages') or []):
2190 translation_language_code = translation_language.get('languageCode')
2191 if not translation_language_code:
2192 continue
2193 process_language(
2194 automatic_captions, base_url, translation_language_code,
2195 {'tlang': translation_language_code})
2196 info['automatic_captions'] = automatic_captions
2197 info['subtitles'] = subtitles
7e72694b 2198
545cc85d 2199 parsed_url = compat_urllib_parse_urlparse(url)
2200 for component in [parsed_url.fragment, parsed_url.query]:
2201 query = compat_parse_qs(component)
2202 for k, v in query.items():
2203 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2204 d_k += '_time'
2205 if d_k not in info and k in s_ks:
2206 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2207
2208 # Youtube Music Auto-generated description
822b9d9c 2209 if video_description:
38d70284 2210 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2211 if mobj:
822b9d9c
RA
2212 release_year = mobj.group('release_year')
2213 release_date = mobj.group('release_date')
2214 if release_date:
2215 release_date = release_date.replace('-', '')
2216 if not release_year:
545cc85d 2217 release_year = release_date[:4]
2218 info.update({
2219 'album': mobj.group('album'.strip()),
2220 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2221 'track': mobj.group('track').strip(),
2222 'release_date': release_date,
cc2db878 2223 'release_year': int_or_none(release_year),
545cc85d 2224 })
7e72694b 2225
545cc85d 2226 initial_data = None
2227 if webpage:
2228 initial_data = self._extract_yt_initial_variable(
2229 webpage, self._YT_INITIAL_DATA_RE, video_id,
2230 'yt initial data')
2231 if not initial_data:
2232 initial_data = self._call_api(
f4f751af 2233 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2234
2235 if not is_live:
2236 try:
2237 # This will error if there is no livechat
2238 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2239 info['subtitles']['live_chat'] = [{
394dcd44 2240 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2241 'video_id': video_id,
2242 'ext': 'json',
2243 'protocol': 'youtube_live_chat_replay',
2244 }]
2245 except (KeyError, IndexError, TypeError):
2246 pass
2247
2248 if initial_data:
2249 chapters = self._extract_chapters_from_json(
2250 initial_data, video_id, duration)
2251 if not chapters:
2252 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2253 contents = try_get(
2254 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2255 list)
2256 if not contents:
2257 continue
2258
2259 def chapter_time(mmlir):
2260 return parse_duration(
2261 get_text(mmlir.get('timeDescription')))
2262
2263 chapters = []
2264 for next_num, content in enumerate(contents, start=1):
2265 mmlir = content.get('macroMarkersListItemRenderer') or {}
2266 start_time = chapter_time(mmlir)
2267 end_time = chapter_time(try_get(
2268 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2269 if next_num < len(contents) else duration
2270 if start_time is None or end_time is None:
2271 continue
2272 chapters.append({
2273 'start_time': start_time,
2274 'end_time': end_time,
2275 'title': get_text(mmlir.get('title')),
2276 })
2277 if chapters:
2278 break
2279 if chapters:
2280 info['chapters'] = chapters
2281
2282 contents = try_get(
2283 initial_data,
2284 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2285 list) or []
2286 for content in contents:
2287 vpir = content.get('videoPrimaryInfoRenderer')
2288 if vpir:
2289 stl = vpir.get('superTitleLink')
2290 if stl:
2291 stl = get_text(stl)
2292 if try_get(
2293 vpir,
2294 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2295 info['location'] = stl
2296 else:
2297 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2298 if mobj:
2299 info.update({
2300 'series': mobj.group(1),
2301 'season_number': int(mobj.group(2)),
2302 'episode_number': int(mobj.group(3)),
2303 })
2304 for tlb in (try_get(
2305 vpir,
2306 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2307 list) or []):
2308 tbr = tlb.get('toggleButtonRenderer') or {}
2309 for getter, regex in [(
2310 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2311 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2312 lambda x: x['accessibility'],
2313 lambda x: x['accessibilityData']['accessibilityData'],
2314 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2315 label = (try_get(tbr, getter, dict) or {}).get('label')
2316 if label:
2317 mobj = re.match(regex, label)
2318 if mobj:
2319 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2320 break
2321 sbr_tooltip = try_get(
2322 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2323 if sbr_tooltip:
2324 like_count, dislike_count = sbr_tooltip.split(' / ')
2325 info.update({
2326 'like_count': str_to_int(like_count),
2327 'dislike_count': str_to_int(dislike_count),
2328 })
2329 vsir = content.get('videoSecondaryInfoRenderer')
2330 if vsir:
2331 info['channel'] = get_text(try_get(
2332 vsir,
2333 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2334 dict))
545cc85d 2335 rows = try_get(
2336 vsir,
2337 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2338 list) or []
2339 multiple_songs = False
2340 for row in rows:
2341 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2342 multiple_songs = True
2343 break
2344 for row in rows:
2345 mrr = row.get('metadataRowRenderer') or {}
2346 mrr_title = mrr.get('title')
2347 if not mrr_title:
2348 continue
2349 mrr_title = get_text(mrr['title'])
2350 mrr_contents_text = get_text(mrr['contents'][0])
2351 if mrr_title == 'License':
2352 info['license'] = mrr_contents_text
2353 elif not multiple_songs:
2354 if mrr_title == 'Album':
2355 info['album'] = mrr_contents_text
2356 elif mrr_title == 'Artist':
2357 info['artist'] = mrr_contents_text
2358 elif mrr_title == 'Song':
2359 info['track'] = mrr_contents_text
2360
2361 fallbacks = {
2362 'channel': 'uploader',
2363 'channel_id': 'uploader_id',
2364 'channel_url': 'uploader_url',
2365 }
2366 for to, frm in fallbacks.items():
2367 if not info.get(to):
2368 info[to] = info.get(frm)
2369
2370 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2371 v = info.get(s_k)
2372 if v:
2373 info[d_k] = v
b84071c0 2374
c224251a
M
2375 is_private = bool_or_none(video_details.get('isPrivate'))
2376 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2377 is_membersonly = None
b28f8d24 2378 is_premium = None
c224251a
M
2379 if initial_data and is_private is not None:
2380 is_membersonly = False
b28f8d24 2381 is_premium = False
c224251a
M
2382 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2383 for content in contents or []:
2384 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2385 for badge in badges or []:
2386 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2387 if label.lower() == 'members only':
2388 is_membersonly = True
2389 break
b28f8d24
M
2390 elif label.lower() == 'premium':
2391 is_premium = True
2392 break
2393 if is_membersonly or is_premium:
c224251a
M
2394 break
2395
2396 # TODO: Add this for playlists
2397 info['availability'] = self._availability(
2398 is_private=is_private,
b28f8d24 2399 needs_premium=is_premium,
c224251a
M
2400 needs_subscription=is_membersonly,
2401 needs_auth=info['age_limit'] >= 18,
2402 is_unlisted=None if is_private is None else is_unlisted)
2403
06167fbb 2404 # get xsrf for annotations or comments
2405 get_annotations = self._downloader.params.get('writeannotations', False)
2406 get_comments = self._downloader.params.get('getcomments', False)
2407 if get_annotations or get_comments:
29f7c58a 2408 xsrf_token = None
545cc85d 2409 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2410 if ytcfg:
2411 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2412 if not xsrf_token:
2413 xsrf_token = self._search_regex(
2414 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2415 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2416
2417 # annotations
06167fbb 2418 if get_annotations:
64b6a4e9
RA
2419 invideo_url = try_get(
2420 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2421 if xsrf_token and invideo_url:
29f7c58a 2422 xsrf_field_name = None
2423 if ytcfg:
2424 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2425 if not xsrf_field_name:
2426 xsrf_field_name = self._search_regex(
2427 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2428 webpage, 'xsrf field name',
29f7c58a 2429 group='xsrf_field_name', default='session_token')
8a784c74 2430 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2431 self._proto_relative_url(invideo_url),
2432 video_id, note='Downloading annotations',
2433 errnote='Unable to download video annotations', fatal=False,
2434 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2435
277d6ff5 2436 if get_comments:
a1c5d2ca 2437 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2438
545cc85d 2439 self.mark_watched(video_id, player_response)
d77ab8e2 2440
545cc85d 2441 return info
c5e8d7af 2442
5f6a1245 2443
8bdd16b4 2444class YoutubeTabIE(YoutubeBaseInfoExtractor):
2445 IE_DESC = 'YouTube.com tab'
70d5c17b 2446 _VALID_URL = r'''(?x)
2447 https?://
2448 (?:\w+\.)?
2449 (?:
2450 youtube(?:kids)?\.com|
2451 invidio\.us
2452 )/
2453 (?:
2454 (?:channel|c|user)/|
2455 (?P<not_channel>
9ba5705a 2456 feed/|hashtag/|
70d5c17b 2457 (?:playlist|watch)\?.*?\blist=
2458 )|
29f7c58a 2459 (?!(?:%s)\b) # Direct URLs
70d5c17b 2460 )
2461 (?P<id>[^/?\#&]+)
2462 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2463 IE_NAME = 'youtube:tab'
2464
81127aa5 2465 _TESTS = [{
8bdd16b4 2466 # playlists, multipage
2467 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2468 'playlist_mincount': 94,
2469 'info_dict': {
2470 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2471 'title': 'Игорь Клейнер - Playlists',
2472 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2473 'uploader': 'Игорь Клейнер',
2474 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2475 },
2476 }, {
2477 # playlists, multipage, different order
2478 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2479 'playlist_mincount': 94,
2480 'info_dict': {
2481 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2482 'title': 'Игорь Клейнер - Playlists',
2483 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2484 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2485 'uploader': 'Игорь Клейнер',
8bdd16b4 2486 },
2487 }, {
2488 # playlists, singlepage
2489 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2490 'playlist_mincount': 4,
2491 'info_dict': {
2492 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2493 'title': 'ThirstForScience - Playlists',
2494 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2495 'uploader': 'ThirstForScience',
2496 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2497 }
2498 }, {
2499 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2500 'only_matching': True,
2501 }, {
2502 # basic, single video playlist
0e30a7b9 2503 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2504 'info_dict': {
0e30a7b9 2505 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2506 'uploader': 'Sergey M.',
2507 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2508 'title': 'youtube-dl public playlist',
81127aa5 2509 },
0e30a7b9 2510 'playlist_count': 1,
9291475f 2511 }, {
8bdd16b4 2512 # empty playlist
0e30a7b9 2513 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2514 'info_dict': {
0e30a7b9 2515 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2516 'uploader': 'Sergey M.',
2517 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2518 'title': 'youtube-dl empty playlist',
9291475f
PH
2519 },
2520 'playlist_count': 0,
2521 }, {
8bdd16b4 2522 # Home tab
2523 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2524 'info_dict': {
8bdd16b4 2525 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2526 'title': 'lex will - Home',
2527 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2528 'uploader': 'lex will',
2529 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2530 },
8bdd16b4 2531 'playlist_mincount': 2,
9291475f 2532 }, {
8bdd16b4 2533 # Videos tab
2534 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2535 'info_dict': {
8bdd16b4 2536 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2537 'title': 'lex will - Videos',
2538 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2539 'uploader': 'lex will',
2540 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2541 },
8bdd16b4 2542 'playlist_mincount': 975,
9291475f 2543 }, {
8bdd16b4 2544 # Videos tab, sorted by popular
2545 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2546 'info_dict': {
8bdd16b4 2547 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2548 'title': 'lex will - Videos',
2549 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2550 'uploader': 'lex will',
2551 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2552 },
8bdd16b4 2553 'playlist_mincount': 199,
9291475f 2554 }, {
8bdd16b4 2555 # Playlists tab
2556 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2557 'info_dict': {
8bdd16b4 2558 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2559 'title': 'lex will - Playlists',
2560 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2561 'uploader': 'lex will',
2562 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2563 },
8bdd16b4 2564 'playlist_mincount': 17,
ac7553d0 2565 }, {
8bdd16b4 2566 # Community tab
2567 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2568 'info_dict': {
8bdd16b4 2569 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2570 'title': 'lex will - Community',
2571 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2572 'uploader': 'lex will',
2573 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2574 },
2575 'playlist_mincount': 18,
87dadd45 2576 }, {
8bdd16b4 2577 # Channels tab
2578 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2579 'info_dict': {
8bdd16b4 2580 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2581 'title': 'lex will - Channels',
2582 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2583 'uploader': 'lex will',
2584 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2585 },
deaec5af 2586 'playlist_mincount': 12,
6b08cdf6 2587 }, {
a0566bbf 2588 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2589 'only_matching': True,
2590 }, {
a0566bbf 2591 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2592 'only_matching': True,
2593 }, {
a0566bbf 2594 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2595 'only_matching': True,
2596 }, {
2597 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2598 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2599 'info_dict': {
2600 'title': '29C3: Not my department',
2601 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2602 'uploader': 'Christiaan008',
2603 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2604 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2605 },
2606 'playlist_count': 96,
2607 }, {
2608 'note': 'Large playlist',
2609 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2610 'info_dict': {
8bdd16b4 2611 'title': 'Uploads from Cauchemar',
2612 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2613 'uploader': 'Cauchemar',
2614 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2615 },
8bdd16b4 2616 'playlist_mincount': 1123,
2617 }, {
2618 # even larger playlist, 8832 videos
2619 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2620 'only_matching': True,
4b7df0d3
JMF
2621 }, {
2622 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2623 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2624 'info_dict': {
acf757f4
PH
2625 'title': 'Uploads from Interstellar Movie',
2626 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2627 'uploader': 'Interstellar Movie',
8bdd16b4 2628 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2629 },
481cc733 2630 'playlist_mincount': 21,
358de58c 2631 }, {
2632 'note': 'Playlist with "show unavailable videos" button',
2633 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2634 'info_dict': {
2635 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2636 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2637 'uploader': 'Phim Siêu Nhân Nhật Bản',
2638 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2639 },
2640 'playlist_mincount': 1400,
2641 'expected_warnings': [
2642 'YouTube said: INFO - Unavailable videos are hidden',
2643 ]
5d342002 2644 }, {
2645 'note': 'Playlist with unavailable videos in a later page',
2646 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2647 'info_dict': {
2648 'title': 'Uploads from BlankTV',
2649 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2650 'uploader': 'BlankTV',
2651 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2652 },
2653 'playlist_mincount': 20000,
8bdd16b4 2654 }, {
2655 # https://github.com/ytdl-org/youtube-dl/issues/21844
2656 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2657 'info_dict': {
2658 'title': 'Data Analysis with Dr Mike Pound',
2659 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2660 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2661 'uploader': 'Computerphile',
deaec5af 2662 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2663 },
2664 'playlist_mincount': 11,
2665 }, {
a0566bbf 2666 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2667 'only_matching': True,
dacb3a86
S
2668 }, {
2669 # Playlist URL that does not actually serve a playlist
2670 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2671 'info_dict': {
2672 'id': 'FqZTN594JQw',
2673 'ext': 'webm',
2674 'title': "Smiley's People 01 detective, Adventure Series, Action",
2675 'uploader': 'STREEM',
2676 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2678 'upload_date': '20150526',
2679 'license': 'Standard YouTube License',
2680 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2681 'categories': ['People & Blogs'],
2682 'tags': list,
dbdaaa23 2683 'view_count': int,
dacb3a86
S
2684 'like_count': int,
2685 'dislike_count': int,
2686 },
2687 'params': {
2688 'skip_download': True,
2689 },
13a75688 2690 'skip': 'This video is not available.',
dacb3a86 2691 'add_ie': [YoutubeIE.ie_key()],
481cc733 2692 }, {
8bdd16b4 2693 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2694 'only_matching': True,
66b48727 2695 }, {
8bdd16b4 2696 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2697 'only_matching': True,
a0566bbf 2698 }, {
2699 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2700 'info_dict': {
2701 'id': '9Auq9mYxFEE',
2702 'ext': 'mp4',
deaec5af 2703 'title': compat_str,
a0566bbf 2704 'uploader': 'Sky News',
2705 'uploader_id': 'skynews',
2706 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2707 'upload_date': '20191102',
deaec5af 2708 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2709 'categories': ['News & Politics'],
2710 'tags': list,
2711 'like_count': int,
2712 'dislike_count': int,
2713 },
2714 'params': {
2715 'skip_download': True,
2716 },
2717 }, {
2718 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2719 'info_dict': {
2720 'id': 'a48o2S1cPoo',
2721 'ext': 'mp4',
2722 'title': 'The Young Turks - Live Main Show',
2723 'uploader': 'The Young Turks',
2724 'uploader_id': 'TheYoungTurks',
2725 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2726 'upload_date': '20150715',
2727 'license': 'Standard YouTube License',
2728 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2729 'categories': ['News & Politics'],
2730 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2731 'like_count': int,
2732 'dislike_count': int,
2733 },
2734 'params': {
2735 'skip_download': True,
2736 },
2737 'only_matching': True,
2738 }, {
2739 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2740 'only_matching': True,
2741 }, {
2742 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2743 'only_matching': True,
3d3dddc9 2744 }, {
2745 'url': 'https://www.youtube.com/feed/trending',
2746 'only_matching': True,
2747 }, {
2748 # needs auth
2749 'url': 'https://www.youtube.com/feed/library',
2750 'only_matching': True,
2751 }, {
2752 # needs auth
2753 'url': 'https://www.youtube.com/feed/history',
2754 'only_matching': True,
2755 }, {
2756 # needs auth
2757 'url': 'https://www.youtube.com/feed/subscriptions',
2758 'only_matching': True,
2759 }, {
2760 # needs auth
2761 'url': 'https://www.youtube.com/feed/watch_later',
2762 'only_matching': True,
2763 }, {
2764 # no longer available?
2765 'url': 'https://www.youtube.com/feed/recommended',
2766 'only_matching': True,
29f7c58a 2767 }, {
2768 # inline playlist with not always working continuations
2769 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2770 'only_matching': True,
2771 }, {
2772 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2773 'only_matching': True,
2774 }, {
2775 'url': 'https://www.youtube.com/course',
2776 'only_matching': True,
2777 }, {
2778 'url': 'https://www.youtube.com/zsecurity',
2779 'only_matching': True,
2780 }, {
2781 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2782 'only_matching': True,
2783 }, {
2784 'url': 'https://www.youtube.com/TheYoungTurks/live',
2785 'only_matching': True,
39ed931e 2786 }, {
2787 'url': 'https://www.youtube.com/hashtag/cctv9',
2788 'info_dict': {
2789 'id': 'cctv9',
2790 'title': '#cctv9',
2791 },
2792 'playlist_mincount': 350,
29f7c58a 2793 }]
2794
2795 @classmethod
2796 def suitable(cls, url):
2797 return False if YoutubeIE.suitable(url) else super(
2798 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2799
2800 def _extract_channel_id(self, webpage):
2801 channel_id = self._html_search_meta(
2802 'channelId', webpage, 'channel id', default=None)
2803 if channel_id:
2804 return channel_id
2805 channel_url = self._html_search_meta(
2806 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2807 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2808 'twitter:app:url:googleplay'), webpage, 'channel url')
2809 return self._search_regex(
2810 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2811 channel_url, 'channel id')
15f6397c 2812
8bdd16b4 2813 @staticmethod
cd7c66cf 2814 def _extract_basic_item_renderer(item):
2815 # Modified from _extract_grid_item_renderer
2816 known_renderers = (
e3c07697 2817 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2818 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2819 )
2820 for key, renderer in item.items():
2821 if key not in known_renderers:
2822 continue
2823 return renderer
8bdd16b4 2824
8bdd16b4 2825 def _grid_entries(self, grid_renderer):
2826 for item in grid_renderer['items']:
2827 if not isinstance(item, dict):
39b62db1 2828 continue
cd7c66cf 2829 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2830 if not isinstance(renderer, dict):
2831 continue
2832 title = try_get(
2833 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2834 # playlist
2835 playlist_id = renderer.get('playlistId')
2836 if playlist_id:
2837 yield self.url_result(
2838 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2839 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2840 video_title=title)
2841 # video
2842 video_id = renderer.get('videoId')
2843 if video_id:
2844 yield self._extract_video(renderer)
2845 # channel
2846 channel_id = renderer.get('channelId')
2847 if channel_id:
2848 title = try_get(
2849 renderer, lambda x: x['title']['simpleText'], compat_str)
2850 yield self.url_result(
2851 'https://www.youtube.com/channel/%s' % channel_id,
2852 ie=YoutubeTabIE.ie_key(), video_title=title)
2853
3d3dddc9 2854 def _shelf_entries_from_content(self, shelf_renderer):
2855 content = shelf_renderer.get('content')
2856 if not isinstance(content, dict):
8bdd16b4 2857 return
cd7c66cf 2858 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2859 if renderer:
2860 # TODO: add support for nested playlists so each shelf is processed
2861 # as separate playlist
2862 # TODO: this includes only first N items
2863 for entry in self._grid_entries(renderer):
2864 yield entry
2865 renderer = content.get('horizontalListRenderer')
2866 if renderer:
2867 # TODO
2868 pass
8bdd16b4 2869
29f7c58a 2870 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2871 ep = try_get(
2872 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2873 compat_str)
2874 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2875 if shelf_url:
29f7c58a 2876 # Skipping links to another channels, note that checking for
2877 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2878 # will not work
2879 if skip_channels and '/channels?' in shelf_url:
2880 return
3d3dddc9 2881 title = try_get(
2882 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2883 yield self.url_result(shelf_url, video_title=title)
2884 # Shelf may not contain shelf URL, fallback to extraction from content
2885 for entry in self._shelf_entries_from_content(shelf_renderer):
2886 yield entry
c5e8d7af 2887
8bdd16b4 2888 def _playlist_entries(self, video_list_renderer):
2889 for content in video_list_renderer['contents']:
2890 if not isinstance(content, dict):
2891 continue
2892 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2893 if not isinstance(renderer, dict):
2894 continue
2895 video_id = renderer.get('videoId')
2896 if not video_id:
2897 continue
2898 yield self._extract_video(renderer)
07aeced6 2899
3462ffa8 2900 def _rich_entries(self, rich_grid_renderer):
2901 renderer = try_get(
70d5c17b 2902 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2903 video_id = renderer.get('videoId')
2904 if not video_id:
2905 return
2906 yield self._extract_video(renderer)
2907
8bdd16b4 2908 def _video_entry(self, video_renderer):
2909 video_id = video_renderer.get('videoId')
2910 if video_id:
2911 return self._extract_video(video_renderer)
dacb3a86 2912
8bdd16b4 2913 def _post_thread_entries(self, post_thread_renderer):
2914 post_renderer = try_get(
2915 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2916 if not post_renderer:
2917 return
2918 # video attachment
2919 video_renderer = try_get(
2920 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2921 video_id = None
2922 if video_renderer:
2923 entry = self._video_entry(video_renderer)
2924 if entry:
2925 yield entry
2926 # inline video links
2927 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2928 for run in runs:
2929 if not isinstance(run, dict):
2930 continue
2931 ep_url = try_get(
2932 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2933 if not ep_url:
2934 continue
2935 if not YoutubeIE.suitable(ep_url):
2936 continue
2937 ep_video_id = YoutubeIE._match_id(ep_url)
2938 if video_id == ep_video_id:
2939 continue
2940 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2941
8bdd16b4 2942 def _post_thread_continuation_entries(self, post_thread_continuation):
2943 contents = post_thread_continuation.get('contents')
2944 if not isinstance(contents, list):
2945 return
2946 for content in contents:
2947 renderer = content.get('backstagePostThreadRenderer')
2948 if not isinstance(renderer, dict):
2949 continue
2950 for entry in self._post_thread_entries(renderer):
2951 yield entry
07aeced6 2952
39ed931e 2953 r''' # unused
2954 def _rich_grid_entries(self, contents):
2955 for content in contents:
2956 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
2957 if video_renderer:
2958 entry = self._video_entry(video_renderer)
2959 if entry:
2960 yield entry
2961 '''
2962
29f7c58a 2963 @staticmethod
2964 def _build_continuation_query(continuation, ctp=None):
2965 query = {
2966 'ctoken': continuation,
2967 'continuation': continuation,
2968 }
2969 if ctp:
2970 query['itct'] = ctp
2971 return query
2972
8bdd16b4 2973 @staticmethod
2974 def _extract_next_continuation_data(renderer):
2975 next_continuation = try_get(
2976 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2977 if not next_continuation:
2978 return
2979 continuation = next_continuation.get('continuation')
2980 if not continuation:
2981 return
2982 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2983 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2984
8bdd16b4 2985 @classmethod
2986 def _extract_continuation(cls, renderer):
2987 next_continuation = cls._extract_next_continuation_data(renderer)
2988 if next_continuation:
2989 return next_continuation
cc2db878 2990 contents = []
2991 for key in ('contents', 'items'):
2992 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2993 for content in contents:
2994 if not isinstance(content, dict):
2995 continue
2996 continuation_ep = try_get(
2997 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2998 dict)
2999 if not continuation_ep:
3000 continue
3001 continuation = try_get(
3002 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3003 if not continuation:
3004 continue
3005 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3006 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3007
f4f751af 3008 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3009
70d5c17b 3010 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3011 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3012 for content in contents:
3013 if not isinstance(content, dict):
8bdd16b4 3014 continue
70d5c17b 3015 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3016 if not is_renderer:
70d5c17b 3017 renderer = content.get('richItemRenderer')
3462ffa8 3018 if renderer:
3019 for entry in self._rich_entries(renderer):
3020 yield entry
3021 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3022 continue
3462ffa8 3023 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3024 for isr_content in isr_contents:
3025 if not isinstance(isr_content, dict):
3026 continue
69184e41 3027
3028 known_renderers = {
3029 'playlistVideoListRenderer': self._playlist_entries,
3030 'gridRenderer': self._grid_entries,
3031 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3032 'backstagePostThreadRenderer': self._post_thread_entries,
3033 'videoRenderer': lambda x: [self._video_entry(x)],
3034 }
3035 for key, renderer in isr_content.items():
3036 if key not in known_renderers:
3037 continue
3038 for entry in known_renderers[key](renderer):
3039 if entry:
3040 yield entry
3462ffa8 3041 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3042 break
70d5c17b 3043
3462ffa8 3044 if not continuation_list[0]:
3045 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3046
3047 if not continuation_list[0]:
3048 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3049
3050 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3051 tab_content = try_get(tab, lambda x: x['content'], dict)
3052 if not tab_content:
3053 return
3462ffa8 3054 parent_renderer = (
29f7c58a 3055 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3056 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3057 for entry in extract_entries(parent_renderer):
3058 yield entry
3462ffa8 3059 continuation = continuation_list[0]
f4f751af 3060 context = self._extract_context(ytcfg)
3061 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3062
8bdd16b4 3063 for page_num in itertools.count(1):
3064 if not continuation:
3065 break
79360d99 3066 query = {
3067 'continuation': continuation['continuation'],
3068 'clickTracking': {'clickTrackingParams': continuation['itct']}
3069 }
f4f751af 3070 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3071 response = self._extract_response(
3072 item_id='%s page %s' % (item_id, page_num),
3073 query=query, headers=headers, ytcfg=ytcfg,
3074 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3075
3076 if not response:
8bdd16b4 3077 break
f4f751af 3078 visitor_data = try_get(
3079 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3080
69184e41 3081 known_continuation_renderers = {
3082 'playlistVideoListContinuation': self._playlist_entries,
3083 'gridContinuation': self._grid_entries,
3084 'itemSectionContinuation': self._post_thread_continuation_entries,
3085 'sectionListContinuation': extract_entries, # for feeds
3086 }
8bdd16b4 3087 continuation_contents = try_get(
69184e41 3088 response, lambda x: x['continuationContents'], dict) or {}
3089 continuation_renderer = None
3090 for key, value in continuation_contents.items():
3091 if key not in known_continuation_renderers:
3462ffa8 3092 continue
69184e41 3093 continuation_renderer = value
3094 continuation_list = [None]
3095 for entry in known_continuation_renderers[key](continuation_renderer):
3096 yield entry
3097 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3098 break
3099 if continuation_renderer:
3100 continue
c5e8d7af 3101
a1b535bd 3102 known_renderers = {
3103 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3104 'gridVideoRenderer': (self._grid_entries, 'items'),
3105 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3106 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3107 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3108 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3109 }
cce889b9 3110 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3111 continuation_items = try_get(
cce889b9 3112 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3113 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3114 video_items_renderer = None
3115 for key, value in continuation_item.items():
3116 if key not in known_renderers:
8bdd16b4 3117 continue
a1b535bd 3118 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3119 continuation_list = [None]
a1b535bd 3120 for entry in known_renderers[key][0](video_items_renderer):
3121 yield entry
9ba5705a 3122 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3123 break
3124 if video_items_renderer:
3125 continue
8bdd16b4 3126 break
9558dcec 3127
8bdd16b4 3128 @staticmethod
3129 def _extract_selected_tab(tabs):
3130 for tab in tabs:
3131 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3132 return tab['tabRenderer']
2b3c2546 3133 else:
8bdd16b4 3134 raise ExtractorError('Unable to find selected tab')
b82f815f 3135
8bdd16b4 3136 @staticmethod
3137 def _extract_uploader(data):
3138 uploader = {}
3139 sidebar_renderer = try_get(
3140 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3141 if sidebar_renderer:
3142 for item in sidebar_renderer:
3143 if not isinstance(item, dict):
3144 continue
3145 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3146 if not isinstance(renderer, dict):
3147 continue
3148 owner = try_get(
3149 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3150 if owner:
3151 uploader['uploader'] = owner.get('text')
3152 uploader['uploader_id'] = try_get(
3153 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3154 uploader['uploader_url'] = urljoin(
3155 'https://www.youtube.com/',
3156 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3157 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3158
d069eca7 3159 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3160 playlist_id = title = description = channel_url = channel_name = channel_id = None
3161 thumbnails_list = tags = []
3162
8bdd16b4 3163 selected_tab = self._extract_selected_tab(tabs)
3164 renderer = try_get(
3165 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3166 if renderer:
b60419c5 3167 channel_name = renderer.get('title')
3168 channel_url = renderer.get('channelUrl')
3169 channel_id = renderer.get('externalId')
39ed931e 3170 else:
64c0d954 3171 renderer = try_get(
3172 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3173
8bdd16b4 3174 if renderer:
3175 title = renderer.get('title')
ecc97af3 3176 description = renderer.get('description', '')
b60419c5 3177 playlist_id = channel_id
3178 tags = renderer.get('keywords', '').split()
3179 thumbnails_list = (
3180 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3181 or try_get(
3182 data,
3183 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3184 list)
b60419c5 3185 or [])
3186
3187 thumbnails = []
3188 for t in thumbnails_list:
3189 if not isinstance(t, dict):
3190 continue
3191 thumbnail_url = url_or_none(t.get('url'))
3192 if not thumbnail_url:
3193 continue
3194 thumbnails.append({
3195 'url': thumbnail_url,
3196 'width': int_or_none(t.get('width')),
3197 'height': int_or_none(t.get('height')),
3198 })
3462ffa8 3199 if playlist_id is None:
70d5c17b 3200 playlist_id = item_id
3201 if title is None:
39ed931e 3202 title = (
3203 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3204 or playlist_id)
b60419c5 3205 title += format_field(selected_tab, 'title', ' - %s')
3206
3207 metadata = {
3208 'playlist_id': playlist_id,
3209 'playlist_title': title,
3210 'playlist_description': description,
3211 'uploader': channel_name,
3212 'uploader_id': channel_id,
3213 'uploader_url': channel_url,
3214 'thumbnails': thumbnails,
3215 'tags': tags,
3216 }
3217 if not channel_id:
3218 metadata.update(self._extract_uploader(data))
3219 metadata.update({
3220 'channel': metadata['uploader'],
3221 'channel_id': metadata['uploader_id'],
3222 'channel_url': metadata['uploader_url']})
3223 return self.playlist_result(
d069eca7
M
3224 self._entries(
3225 selected_tab, playlist_id,
3226 self._extract_identity_token(webpage, item_id),
f4f751af 3227 self._extract_account_syncid(data),
3228 self._extract_ytcfg(item_id, webpage)),
b60419c5 3229 **metadata)
73c4ac2c 3230
79360d99 3231 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3232 first_id = last_id = None
79360d99 3233 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3234 headers = self._generate_api_headers(
3235 ytcfg, account_syncid=self._extract_account_syncid(data),
3236 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3237 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3238 for page_num in itertools.count(1):
cd7c66cf 3239 videos = list(self._playlist_entries(playlist))
3240 if not videos:
3241 return
2be71994 3242 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3243 if start >= len(videos):
3244 return
3245 for video in videos[start:]:
3246 if video['id'] == first_id:
3247 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3248 return
3249 yield video
3250 first_id = first_id or videos[0]['id']
3251 last_id = videos[-1]['id']
79360d99 3252 watch_endpoint = try_get(
3253 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3254 query = {
3255 'playlistId': playlist_id,
3256 'videoId': watch_endpoint.get('videoId') or last_id,
3257 'index': watch_endpoint.get('index') or len(videos),
3258 'params': watch_endpoint.get('params') or 'OAE%3D'
3259 }
3260 response = self._extract_response(
3261 item_id='%s page %d' % (playlist_id, page_num),
3262 query=query,
3263 ep='next',
3264 headers=headers,
3265 check_get_keys='contents'
3266 )
cd7c66cf 3267 playlist = try_get(
79360d99 3268 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3269
79360d99 3270 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3271 title = playlist.get('title') or try_get(
3272 data, lambda x: x['titleText']['simpleText'], compat_str)
3273 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3274
3275 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3276 playlist_url = urljoin(url, try_get(
3277 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3278 compat_str))
3279 if playlist_url and playlist_url != url:
3280 return self.url_result(
3281 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3282 video_title=title)
cd7c66cf 3283
8bdd16b4 3284 return self.playlist_result(
79360d99 3285 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3286 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3287
f3eaa8dd
M
3288 def _extract_alerts(self, data, expected=False):
3289
3290 def _real_extract_alerts():
3291 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3292 if not isinstance(alert_dict, dict):
02ced43c 3293 continue
f3eaa8dd
M
3294 for alert in alert_dict.values():
3295 alert_type = alert.get('type')
3296 if not alert_type:
3297 continue
3ffc7c89 3298 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3299 if message:
3300 yield alert_type, message
f3eaa8dd 3301 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3302 message += try_get(run, lambda x: x['text'], compat_str)
3303 if message:
3304 yield alert_type, message
f3eaa8dd 3305
3ffc7c89 3306 errors = []
3307 warnings = []
f3eaa8dd
M
3308 for alert_type, alert_message in _real_extract_alerts():
3309 if alert_type.lower() == 'error':
3ffc7c89 3310 errors.append([alert_type, alert_message])
f3eaa8dd 3311 else:
3ffc7c89 3312 warnings.append([alert_type, alert_message])
f3eaa8dd 3313
3ffc7c89 3314 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3315 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3316 if errors:
3317 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3318
358de58c 3319 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3320 """
3321 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3322 """
3323 sidebar_renderer = try_get(
5d342002 3324 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3325 if not sidebar_renderer:
3326 return
3327 browse_id = params = None
358de58c 3328 for item in sidebar_renderer:
3329 if not isinstance(item, dict):
3330 continue
3331 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3332 menu_renderer = try_get(
3333 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3334 for menu_item in menu_renderer:
3335 if not isinstance(menu_item, dict):
3336 continue
3337 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3338 text = try_get(
3339 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3340 if not text or text.lower() != 'show unavailable videos':
3341 continue
3342 browse_endpoint = try_get(
3343 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3344 browse_id = browse_endpoint.get('browseId')
3345 params = browse_endpoint.get('params')
5d342002 3346 break
3347
3348 ytcfg = self._extract_ytcfg(item_id, webpage)
3349 headers = self._generate_api_headers(
3350 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3351 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3352 visitor_data=try_get(
3353 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3354 query = {
3355 'params': params or 'wgYCCAA=',
3356 'browseId': browse_id or 'VL%s' % item_id
3357 }
3358 return self._extract_response(
3359 item_id=item_id, headers=headers, query=query,
3360 check_get_keys='contents', fatal=False,
3361 note='Downloading API JSON with unavailable videos')
358de58c 3362
79360d99 3363 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3364 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3365 response = None
3366 last_error = None
3367 count = -1
3368 retries = self._downloader.params.get('extractor_retries', 3)
3369 if check_get_keys is None:
3370 check_get_keys = []
3371 while count < retries:
3372 count += 1
3373 if last_error:
3374 self.report_warning('%s. Retrying ...' % last_error)
3375 try:
3376 response = self._call_api(
3377 ep=ep, fatal=True, headers=headers,
358de58c 3378 video_id=item_id, query=query,
79360d99 3379 context=self._extract_context(ytcfg),
3380 api_key=self._extract_api_key(ytcfg),
3381 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3382 except ExtractorError as e:
3383 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3384 # Downloading page may result in intermittent 5xx HTTP error
3385 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3386 last_error = 'HTTP Error %s' % e.cause.code
3387 if count < retries:
3388 continue
358de58c 3389 if fatal:
3390 raise
3391 else:
3392 self.report_warning(error_to_compat_str(e))
3393 return
3394
79360d99 3395 else:
3396 # Youtube may send alerts if there was an issue with the continuation page
3397 self._extract_alerts(response, expected=False)
3398 if not check_get_keys or dict_get(response, check_get_keys):
3399 break
3400 # Youtube sometimes sends incomplete data
3401 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3402 last_error = 'Incomplete data received'
3403 if count >= retries:
358de58c 3404 if fatal:
3405 raise ExtractorError(last_error)
3406 else:
3407 self.report_warning(last_error)
3408 return
79360d99 3409 return response
3410
cd7c66cf 3411 def _extract_webpage(self, url, item_id):
62bff2c1 3412 retries = self._downloader.params.get('extractor_retries', 3)
3413 count = -1
c705177d 3414 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3415 while count < retries:
62bff2c1 3416 count += 1
14fdfea9 3417 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3418 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3419 if count:
c705177d 3420 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3421 webpage = self._download_webpage(
3422 url, item_id,
cd7c66cf 3423 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3424 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3425 self._extract_alerts(data, expected=True)
14fdfea9 3426 if data.get('contents') or data.get('currentVideoEndpoint'):
3427 break
c705177d 3428 if count >= retries:
6a39ee13 3429 raise ExtractorError(last_error)
cd7c66cf 3430 return webpage, data
3431
3432 def _real_extract(self, url):
3433 item_id = self._match_id(url)
3434 url = compat_urlparse.urlunparse(
3435 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3436
3437 # This is not matched in a channel page with a tab selected
3438 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3439 mobj = mobj.groupdict() if mobj else {}
3440 if mobj and not mobj.get('not_channel'):
6a39ee13 3441 self.report_warning(
cd7c66cf 3442 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3443 'To download only the videos in the home page, add a "/featured" to the URL')
3444 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3445
3446 # Handle both video/playlist URLs
3447 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3448 video_id = qs.get('v', [None])[0]
3449 playlist_id = qs.get('list', [None])[0]
3450
3451 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3452 if not playlist_id:
3453 # If there is neither video or playlist ids,
3454 # youtube redirects to home page, which is undesirable
3455 raise ExtractorError('Unable to recognize tab page')
6a39ee13 3456 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3457 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3458
3459 if video_id and playlist_id:
3460 if self._downloader.params.get('noplaylist'):
3461 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3462 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3463 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3464
3465 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3466
358de58c 3467 # YouTube sometimes provides a button to reload playlist with unavailable videos.
3468 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
3469
8bdd16b4 3470 tabs = try_get(
3471 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3472 if tabs:
d069eca7 3473 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3474
8bdd16b4 3475 playlist = try_get(
3476 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3477 if playlist:
79360d99 3478 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3479
a0566bbf 3480 video_id = try_get(
3481 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3482 compat_str) or video_id
8bdd16b4 3483 if video_id:
6a39ee13 3484 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3485 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3486
8bdd16b4 3487 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3488
c5e8d7af 3489
8bdd16b4 3490class YoutubePlaylistIE(InfoExtractor):
3491 IE_DESC = 'YouTube.com playlists'
3492 _VALID_URL = r'''(?x)(?:
3493 (?:https?://)?
3494 (?:\w+\.)?
3495 (?:
3496 (?:
3497 youtube(?:kids)?\.com|
29f7c58a 3498 invidio\.us
8bdd16b4 3499 )
3500 /.*?\?.*?\blist=
3501 )?
3502 (?P<id>%(playlist_id)s)
3503 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3504 IE_NAME = 'youtube:playlist'
cdc628a4 3505 _TESTS = [{
8bdd16b4 3506 'note': 'issue #673',
3507 'url': 'PLBB231211A4F62143',
cdc628a4 3508 'info_dict': {
8bdd16b4 3509 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3510 'id': 'PLBB231211A4F62143',
3511 'uploader': 'Wickydoo',
3512 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3513 },
3514 'playlist_mincount': 29,
3515 }, {
3516 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3517 'info_dict': {
3518 'title': 'YDL_safe_search',
3519 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3520 },
3521 'playlist_count': 2,
3522 'skip': 'This playlist is private',
9558dcec 3523 }, {
8bdd16b4 3524 'note': 'embedded',
3525 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3526 'playlist_count': 4,
9558dcec 3527 'info_dict': {
8bdd16b4 3528 'title': 'JODA15',
3529 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3530 'uploader': 'milan',
3531 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3532 }
cdc628a4 3533 }, {
8bdd16b4 3534 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3535 'playlist_mincount': 982,
3536 'info_dict': {
3537 'title': '2018 Chinese New Singles (11/6 updated)',
3538 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3539 'uploader': 'LBK',
3540 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3541 }
daa0df9e 3542 }, {
29f7c58a 3543 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3544 'only_matching': True,
3545 }, {
3546 # music album playlist
3547 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3548 'only_matching': True,
3549 }]
3550
3551 @classmethod
3552 def suitable(cls, url):
3553 return False if YoutubeTabIE.suitable(url) else super(
3554 YoutubePlaylistIE, cls).suitable(url)
3555
3556 def _real_extract(self, url):
3557 playlist_id = self._match_id(url)
3558 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3559 if not qs:
3560 qs = {'list': playlist_id}
3561 return self.url_result(
3562 update_url_query('https://www.youtube.com/playlist', qs),
3563 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3564
3565
3566class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3567 IE_DESC = 'youtu.be'
29f7c58a 3568 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3569 _TESTS = [{
8bdd16b4 3570 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3571 'info_dict': {
3572 'id': 'yeWKywCrFtk',
3573 'ext': 'mp4',
3574 'title': 'Small Scale Baler and Braiding Rugs',
3575 'uploader': 'Backus-Page House Museum',
3576 'uploader_id': 'backuspagemuseum',
3577 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3578 'upload_date': '20161008',
3579 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3580 'categories': ['Nonprofits & Activism'],
3581 'tags': list,
3582 'like_count': int,
3583 'dislike_count': int,
3584 },
3585 'params': {
3586 'noplaylist': True,
3587 'skip_download': True,
3588 },
39e7107d 3589 }, {
8bdd16b4 3590 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3591 'only_matching': True,
cdc628a4
PH
3592 }]
3593
8bdd16b4 3594 def _real_extract(self, url):
29f7c58a 3595 mobj = re.match(self._VALID_URL, url)
3596 video_id = mobj.group('id')
3597 playlist_id = mobj.group('playlist_id')
8bdd16b4 3598 return self.url_result(
29f7c58a 3599 update_url_query('https://www.youtube.com/watch', {
3600 'v': video_id,
3601 'list': playlist_id,
3602 'feature': 'youtu.be',
3603 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3604
3605
3606class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3607 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3608 _VALID_URL = r'ytuser:(?P<id>.+)'
3609 _TESTS = [{
3610 'url': 'ytuser:phihag',
3611 'only_matching': True,
3612 }]
3613
3614 def _real_extract(self, url):
3615 user_id = self._match_id(url)
3616 return self.url_result(
3617 'https://www.youtube.com/user/%s' % user_id,
3618 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3619
b05654f0 3620
3d3dddc9 3621class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3622 IE_NAME = 'youtube:favorites'
3623 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3624 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3625 _LOGIN_REQUIRED = True
3626 _TESTS = [{
3627 'url': ':ytfav',
3628 'only_matching': True,
3629 }, {
3630 'url': ':ytfavorites',
3631 'only_matching': True,
3632 }]
3633
3634 def _real_extract(self, url):
3635 return self.url_result(
3636 'https://www.youtube.com/playlist?list=LL',
3637 ie=YoutubeTabIE.ie_key())
3638
3639
79360d99 3640class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3641 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3642 # there doesn't appear to be a real limit, for example if you search for
3643 # 'python' you get more than 8.000.000 results
3644 _MAX_RESULTS = float('inf')
78caa52a 3645 IE_NAME = 'youtube:search'
b05654f0 3646 _SEARCH_KEY = 'ytsearch'
6c894ea1 3647 _SEARCH_PARAMS = None
9dd8e46a 3648 _TESTS = []
b05654f0 3649
6c894ea1 3650 def _entries(self, query, n):
a5c56234 3651 data = {'query': query}
6c894ea1
U
3652 if self._SEARCH_PARAMS:
3653 data['params'] = self._SEARCH_PARAMS
3654 total = 0
3655 for page_num in itertools.count(1):
79360d99 3656 search = self._extract_response(
3657 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3658 check_get_keys=('contents', 'onResponseReceivedCommands')
3659 )
6c894ea1 3660 if not search:
b4c08069 3661 break
6c894ea1
U
3662 slr_contents = try_get(
3663 search,
3664 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3665 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3666 list)
3667 if not slr_contents:
a22b2fd1 3668 break
0366ae87 3669
0366ae87
M
3670 # Youtube sometimes adds promoted content to searches,
3671 # changing the index location of videos and token.
3672 # So we search through all entries till we find them.
30a074c2 3673 continuation_token = None
3674 for slr_content in slr_contents:
a96c6d15 3675 if continuation_token is None:
3676 continuation_token = try_get(
3677 slr_content,
3678 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3679 compat_str)
3680
30a074c2 3681 isr_contents = try_get(
3682 slr_content,
3683 lambda x: x['itemSectionRenderer']['contents'],
3684 list)
9da76d30 3685 if not isr_contents:
30a074c2 3686 continue
3687 for content in isr_contents:
3688 if not isinstance(content, dict):
3689 continue
3690 video = content.get('videoRenderer')
3691 if not isinstance(video, dict):
3692 continue
3693 video_id = video.get('videoId')
3694 if not video_id:
3695 continue
3696
3697 yield self._extract_video(video)
3698 total += 1
3699 if total == n:
3700 return
0366ae87 3701
0366ae87 3702 if not continuation_token:
6c894ea1 3703 break
0366ae87 3704 data['continuation'] = continuation_token
b05654f0 3705
6c894ea1
U
3706 def _get_n_results(self, query, n):
3707 """Get a specified number of results for a query"""
3708 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3709
c9ae7b95 3710
a3dd9248 3711class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3712 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3713 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3714 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3715 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3716
c9ae7b95 3717
386e1dd9 3718class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3719 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3720 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3721 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3722 # _MAX_RESULTS = 100
3462ffa8 3723 _TESTS = [{
3724 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3725 'playlist_mincount': 5,
3726 'info_dict': {
3727 'title': 'youtube-dl test video',
3728 }
3729 }, {
3730 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3731 'only_matching': True,
3732 }]
3733
386e1dd9 3734 @classmethod
3735 def _make_valid_url(cls):
3736 return cls._VALID_URL
3737
3462ffa8 3738 def _real_extract(self, url):
386e1dd9 3739 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3740 query = (qs.get('search_query') or qs.get('q'))[0]
3741 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3742 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3743
3744
3745class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3746 """
25f14e9f 3747 Base class for feed extractors
3d3dddc9 3748 Subclasses must define the _FEED_NAME property.
d7ae0639 3749 """
b2e8bc1b 3750 _LOGIN_REQUIRED = True
ef2f3c7f 3751 _TESTS = []
d7ae0639
JMF
3752
3753 @property
3754 def IE_NAME(self):
78caa52a 3755 return 'youtube:%s' % self._FEED_NAME
04cc9617 3756
81f0259b 3757 def _real_initialize(self):
b2e8bc1b 3758 self._login()
81f0259b 3759
3853309f 3760 def _real_extract(self, url):
3d3dddc9 3761 return self.url_result(
3762 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3763 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3764
3765
ef2f3c7f 3766class YoutubeWatchLaterIE(InfoExtractor):
3767 IE_NAME = 'youtube:watchlater'
70d5c17b 3768 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3769 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3770 _TESTS = [{
8bdd16b4 3771 'url': ':ytwatchlater',
bc7a9cd8
S
3772 'only_matching': True,
3773 }]
25f14e9f
S
3774
3775 def _real_extract(self, url):
ef2f3c7f 3776 return self.url_result(
3777 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3778
3779
25f14e9f
S
3780class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3781 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3782 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3783 _FEED_NAME = 'recommended'
3d3dddc9 3784 _TESTS = [{
3785 'url': ':ytrec',
3786 'only_matching': True,
3787 }, {
3788 'url': ':ytrecommended',
3789 'only_matching': True,
3790 }, {
3791 'url': 'https://youtube.com',
3792 'only_matching': True,
3793 }]
1ed5b5c9 3794
1ed5b5c9 3795
25f14e9f 3796class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3797 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3798 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3799 _FEED_NAME = 'subscriptions'
3d3dddc9 3800 _TESTS = [{
3801 'url': ':ytsubs',
3802 'only_matching': True,
3803 }, {
3804 'url': ':ytsubscriptions',
3805 'only_matching': True,
3806 }]
1ed5b5c9 3807
1ed5b5c9 3808
25f14e9f 3809class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3810 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3811 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3812 _FEED_NAME = 'history'
3d3dddc9 3813 _TESTS = [{
3814 'url': ':ythistory',
3815 'only_matching': True,
3816 }]
1ed5b5c9
JMF
3817
3818
15870e90
PH
3819class YoutubeTruncatedURLIE(InfoExtractor):
3820 IE_NAME = 'youtube:truncated_url'
3821 IE_DESC = False # Do not list
975d35db 3822 _VALID_URL = r'''(?x)
b95aab84
PH
3823 (?:https?://)?
3824 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3825 (?:watch\?(?:
c4808c60 3826 feature=[a-z_]+|
b95aab84
PH
3827 annotation_id=annotation_[^&]+|
3828 x-yt-cl=[0-9]+|
c1708b89 3829 hl=[^&]*|
287be8c6 3830 t=[0-9]+
b95aab84
PH
3831 )?
3832 |
3833 attribution_link\?a=[^&]+
3834 )
3835 $
975d35db 3836 '''
15870e90 3837
c4808c60 3838 _TESTS = [{
2d3d2997 3839 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3840 'only_matching': True,
dc2fc736 3841 }, {
2d3d2997 3842 'url': 'https://www.youtube.com/watch?',
dc2fc736 3843 'only_matching': True,
b95aab84
PH
3844 }, {
3845 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3846 'only_matching': True,
3847 }, {
3848 'url': 'https://www.youtube.com/watch?feature=foo',
3849 'only_matching': True,
c1708b89
PH
3850 }, {
3851 'url': 'https://www.youtube.com/watch?hl=en-GB',
3852 'only_matching': True,
287be8c6
PH
3853 }, {
3854 'url': 'https://www.youtube.com/watch?t=2372',
3855 'only_matching': True,
c4808c60
PH
3856 }]
3857
15870e90
PH
3858 def _real_extract(self, url):
3859 raise ExtractorError(
78caa52a
PH
3860 'Did you forget to quote the URL? Remember that & is a meta '
3861 'character in most shells, so you want to put the URL in quotes, '
3867038a 3862 'like youtube-dl '
2d3d2997 3863 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3864 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3865 expected=True)
772fd5cc
PH
3866
3867
3868class YoutubeTruncatedIDIE(InfoExtractor):
3869 IE_NAME = 'youtube:truncated_id'
3870 IE_DESC = False # Do not list
b95aab84 3871 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3872
3873 _TESTS = [{
3874 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3875 'only_matching': True,
3876 }]
3877
3878 def _real_extract(self, url):
3879 video_id = self._match_id(url)
3880 raise ExtractorError(
3881 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3882 expected=True)