return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+# any clients starting with _ cannot be explicity requested by the user
+INNERTUBE_CLIENTS = {
+ 'web': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20210622.10.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
+ },
+ 'web_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_EMBEDDED_PLAYER',
+ 'clientVersion': '1.20210620.0.1',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
+ },
+ 'web_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
+ 'INNERTUBE_HOST': 'music.youtube.com',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_REMIX',
+ 'clientVersion': '1.20210621.00.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
+ },
+ 'android': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID',
+ 'clientVersion': '16.20',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
+ },
+ 'android_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_EMBEDDED_PLAYER',
+ 'clientVersion': '16.20',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
+ },
+ 'android_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
+ 'INNERTUBE_HOST': 'music.youtube.com',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_MUSIC',
+ 'clientVersion': '4.32',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
+ },
+ # ios has HLS live streams
+ # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
+ 'ios': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS',
+ 'clientVersion': '16.20',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
+ },
+ 'ios_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_MESSAGES_EXTENSION',
+ 'clientVersion': '16.20',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
+ },
+ 'ios_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
+ 'INNERTUBE_HOST': 'music.youtube.com',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_MUSIC',
+ 'clientVersion': '4.32',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
+ },
+ # mweb has 'ultralow' formats
+ # See: https://github.com/yt-dlp/yt-dlp/pull/557
+ 'mweb': {
+ 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MWEB',
+ 'clientVersion': '2.20210721.07.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
+ },
+}
+
+
+def build_innertube_clients():
+ third_party = {
+ 'embedUrl': 'https://google.com', # Can be any valid URL
+ }
+ base_clients = ('android', 'web', 'ios', 'mweb')
+ priority = qualities(base_clients[::-1])
+
+ for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
+ ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
+ ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
+ ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
+ ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
+
+ if client in base_clients:
+ INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
+ agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
+ agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
+ agegate_ytcfg['priority'] -= 1
+ elif client.endswith('_embedded'):
+ ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
+ ytcfg['priority'] -= 2
+ else:
+ ytcfg['priority'] -= 3
+
+
+build_innertube_clients()
+
+
class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
- _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
- _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
-
- _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
- _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
- _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
_RESERVED_NAMES = (
- r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
- r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
+ r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
+ r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
+ r'browse|oembed|get_video_info|iframe_api|s/player|'
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
+ _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
+
_NETRC_MACHINE = 'youtube'
+
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
- _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
+ r''' # Unused since login is broken
+ _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
+ _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
+
+ _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
+ _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
+ _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
+ '''
def _login(self):
"""
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
- _YT_DEFAULT_YTCFGS = {
- 'WEB': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'WEB',
- 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': '2.20210622.10.00',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
- },
- 'WEB_REMIX': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
- 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
- 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'WEB_REMIX',
- 'clientVersion': '1.20210621.00.00',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
- },
- 'WEB_EMBEDDED_PLAYER': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
- 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'WEB_EMBEDDED_PLAYER',
- 'clientVersion': '1.20210620.0.1',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
- },
- 'ANDROID': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'ANDROID',
- 'INNERTUBE_CLIENT_VERSION': '16.20',
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'ANDROID',
- 'clientVersion': '16.20',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
- },
- 'ANDROID_EMBEDDED_PLAYER': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
- 'INNERTUBE_CLIENT_VERSION': '16.20',
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'ANDROID_EMBEDDED_PLAYER',
- 'clientVersion': '16.20',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
- },
- 'ANDROID_MUSIC': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
- 'INNERTUBE_CLIENT_VERSION': '4.32',
- 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'ANDROID_MUSIC',
- 'clientVersion': '4.32',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
- },
- 'IOS': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'IOS',
- 'INNERTUBE_CLIENT_VERSION': '16.20',
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'IOS',
- 'clientVersion': '16.20',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
-
- },
- 'IOS_MUSIC': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
- 'INNERTUBE_CLIENT_VERSION': '4.32',
- 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'IOS_MUSIC',
- 'clientVersion': '4.32',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
- },
- 'IOS_MESSAGES_EXTENSION': {
- 'INNERTUBE_API_VERSION': 'v1',
- 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
- 'INNERTUBE_CLIENT_VERSION': '16.20',
- 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'clientName': 'IOS_MESSAGES_EXTENSION',
- 'clientVersion': '16.20',
- 'hl': 'en',
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
- }
- }
-
- _YT_DEFAULT_INNERTUBE_HOSTS = {
- 'DIRECT': 'youtubei.googleapis.com',
- 'WEB': 'www.youtube.com',
- 'WEB_REMIX': 'music.youtube.com',
- 'ANDROID_MUSIC': 'music.youtube.com'
- }
+ def _get_default_ytcfg(self, client='web'):
+ return copy.deepcopy(INNERTUBE_CLIENTS[client])
- # clients starting with _ cannot be explicity requested by the user
- _YT_CLIENTS = {
- 'web': 'WEB',
- 'web_music': 'WEB_REMIX',
- '_web_embedded': 'WEB_EMBEDDED_PLAYER',
- '_web_agegate': 'TVHTML5',
- 'android': 'ANDROID',
- 'android_music': 'ANDROID_MUSIC',
- '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
- '_android_agegate': 'ANDROID',
- 'ios': 'IOS',
- 'ios_music': 'IOS_MUSIC',
- '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
- '_ios_agegate': 'IOS'
- }
-
- def _get_default_ytcfg(self, client='WEB'):
- if client in self._YT_DEFAULT_YTCFGS:
- return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
- self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
- return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
+ def _get_innertube_host(self, client='web'):
+ return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
- def _get_innertube_host(self, client='WEB'):
- return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
-
- def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
+ def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
# try_get but with fallback to default ytcfg client values when present
_func = lambda y: try_get(y, getter, expected_type)
return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
- def _extract_client_name(self, ytcfg, default_client='WEB'):
- return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
+ def _extract_client_name(self, ytcfg, default_client='web'):
+ return self._ytcfg_get_safe(
+ ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
@staticmethod
def _extract_session_index(*data):
if session_index is not None:
return session_index
- def _extract_client_version(self, ytcfg, default_client='WEB'):
- return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
+ def _extract_client_version(self, ytcfg, default_client='web'):
+ return self._ytcfg_get_safe(
+ ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
- def _extract_api_key(self, ytcfg=None, default_client='WEB'):
+ def _extract_api_key(self, ytcfg=None, default_client='web'):
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
- def _extract_context(self, ytcfg=None, default_client='WEB'):
+ def _extract_context(self, ytcfg=None, default_client='web'):
_get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
context = _get_context(ytcfg)
if context:
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
note='Downloading API JSON', errnote='Unable to download API page',
- context=None, api_key=None, api_hostname=None, default_client='WEB'):
+ context=None, api_key=None, api_hostname=None, default_client='web'):
data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
data.update(query)
def generate_api_headers(
self, ytcfg=None, identity_token=None, account_syncid=None,
- visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
+ visitor_data=None, api_hostname=None, default_client='web', session_index=None):
origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
headers = {
'X-YouTube-Client-Name': compat_str(
alert_type = alert.get('type')
if not alert_type:
continue
- message = cls._get_text(alert.get('text'))
+ message = cls._get_text(alert, 'text')
if message:
yield alert_type, message
return badges
@staticmethod
- def _get_text(data, getter=None, max_runs=None):
- for get in variadic(getter):
- d = try_get(data, get) if get is not None else data
- text = try_get(d, lambda x: x['simpleText'], compat_str)
- if text:
- return text
- runs = try_get(d, lambda x: x['runs'], list) or []
- if not runs and isinstance(d, list):
- runs = d
-
- def get_runs(runs):
- for run in runs[:min(len(runs), max_runs or len(runs))]:
- yield try_get(run, lambda x: x['text'], compat_str) or ''
-
- text = ''.join(get_runs(runs))
- if text:
- return text
+ def _get_text(data, *path_list, max_runs=None):
+ for path in path_list or [None]:
+ if path is None:
+ obj = [data]
+ else:
+ obj = traverse_obj(data, path, default=[])
+ if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
+ obj = [obj]
+ for item in obj:
+ text = try_get(item, lambda x: x['simpleText'], compat_str)
+ if text:
+ return text
+ runs = try_get(item, lambda x: x['runs'], list) or []
+ if not runs and isinstance(item, list):
+ runs = item
+
+ runs = runs[:min(len(runs), max_runs or len(runs))]
+ text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
+ if text:
+ return text
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
- default_client='WEB'):
+ default_client='web'):
response = None
last_error = None
count = -1
def _extract_video(self, renderer):
video_id = renderer.get('videoId')
- title = self._get_text(renderer.get('title'))
- description = self._get_text(renderer.get('descriptionSnippet'))
- duration = parse_duration(self._get_text(renderer.get('lengthText')))
- view_count_text = self._get_text(renderer.get('viewCountText')) or ''
+ title = self._get_text(renderer, 'title')
+ description = self._get_text(renderer, 'descriptionSnippet')
+ duration = parse_duration(self._get_text(
+ renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
+ view_count_text = self._get_text(renderer, 'viewCountText') or ''
view_count = str_to_int(self._search_regex(
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
'view count', default=None))
- uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
+ uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
return {
'_type': 'url',
_AGE_GATE_REASONS = (
'Sign in to confirm your age',
'This video may be inappropriate for some users.',
- 'Sorry, this content is age-restricted.')
+ 'Sorry, this content is age-restricted.',
+ 'Please confirm your age.')
+
+ _AGE_GATE_STATUS_REASONS = (
+ 'AGE_VERIFICATION_REQUIRED',
+ 'AGE_CHECK_REQUIRED'
+ )
_GEO_BYPASS = False
'format': '141/bestaudio[ext=m4a]',
},
},
- # Normal age-gate video (embed allowed)
+ # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
{
+ 'note': 'Embed allowed age-gate video',
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
'age_limit': 18,
},
},
+ {
+ 'note': 'Age-gate video with embed allowed in public site',
+ 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
+ 'info_dict': {
+ 'id': 'HsUATh_Nc2U',
+ 'ext': 'mp4',
+ 'title': 'Godzilla 2 (Official Video)',
+ 'description': 'md5:bf77e03fcae5529475e500129b05668a',
+ 'upload_date': '20200408',
+ 'uploader_id': 'FlyingKitty900',
+ 'uploader': 'FlyingKitty',
+ 'age_limit': 18,
+ },
+ },
+ {
+ 'note': 'Age-gate video embedable only with clientScreen=EMBED',
+ 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
+ 'info_dict': {
+ 'id': 'Tq92D6wQ1mg',
+ 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+ 'ext': 'mp4',
+ 'upload_date': '20191227',
+ 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'uploader': 'Projekt Melody',
+ 'description': 'md5:17eccca93a786d51bc67646756894066',
+ 'age_limit': 18,
+ },
+ },
+ {
+ 'note': 'Non-Agegated non-embeddable video',
+ 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
+ 'info_dict': {
+ 'id': 'MeJVWBSsPAY',
+ 'ext': 'mp4',
+ 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
+ 'uploader': 'Herr Lurik',
+ 'uploader_id': 'st3in234',
+ 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
+ 'upload_date': '20130730',
+ },
+ },
+ {
+ 'note': 'Non-bypassable age-gated video',
+ 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
+ 'only_matching': True,
+ },
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
{
'params': {
'skip_download': True,
},
+ 'skip': 'Not multifeed anymore',
},
{
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
data,
('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
expected_type=list, default=[])
- chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
- chapter_title = lambda chapter: self._get_text(chapter.get('title'))
+ chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
+ chapter_title = lambda chapter: self._get_text(chapter, 'title')
return next((
filter(None, (
if not comment_id:
return
- text = self._get_text(comment_renderer.get('contentText'))
+ text = self._get_text(comment_renderer, 'contentText')
# note: timestamp is an estimate calculated from the current time and time_text
- time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
+ time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
time_text_dt = self.parse_time_text(time_text)
if isinstance(time_text_dt, datetime.datetime):
timestamp = calendar.timegm(time_text_dt.timetuple())
- author = self._get_text(comment_renderer.get('authorText'))
+ author = self._get_text(comment_renderer, 'authorText')
author_id = try_get(comment_renderer,
lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
for content in contents:
comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
expected_comment_count = parse_count(self._get_text(
- comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
+ comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
if expected_comment_count:
comment_counts[1] = expected_comment_count
known_entry_comment_renderers = ('itemSectionRenderer',)
estimated_total = 0
max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
-
+ # Force English regardless of account setting to prevent parsing issues
+ # See: https://github.com/yt-dlp/yt-dlp/issues/532
+ ytcfg = copy.deepcopy(ytcfg)
+ traverse_obj(
+ ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
try:
for comment in _real_comment_extract(contents):
if len(comments) >= max_comments:
'racyCheckOk': True
}
- @staticmethod
- def _get_video_info_params(video_id, client='TVHTML5'):
- GVI_CLIENTS = {
- 'ANDROID': {
- 'c': 'ANDROID',
- 'cver': '16.20',
- },
- 'TVHTML5': {
- 'c': 'TVHTML5',
- 'cver': '6.20180913',
- },
- 'IOS': {
- 'c': 'IOS',
- 'cver': '16.20'
- }
- }
- query = {
- 'video_id': video_id,
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'html5': '1'
- }
- query.update(GVI_CLIENTS.get(client))
- return query
+ def _is_agegated(self, player_response):
+ reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
+ for reason in reasons:
+ if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS:
+ return True
+ if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None:
+ return True
+ return False
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
headers = self.generate_api_headers(
player_ytcfg, identity_token, syncid,
- default_client=self._YT_CLIENTS[client], session_index=session_index)
+ default_client=client, session_index=session_index)
yt_query = {'videoId': video_id}
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
ytcfg=player_ytcfg, headers=headers, fatal=False,
- default_client=self._YT_CLIENTS[client],
+ default_client=client,
note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
) or None
- def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
- # get_video_info endpoint seems to be completely dead
- gvi_client = None # self._YT_CLIENTS.get(f'_{client}_agegate')
- if gvi_client:
- pr = self._parse_json(traverse_obj(
- compat_parse_qs(self._download_webpage(
- self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
- 'Refetching age-gated %s info webpage' % gvi_client.lower(),
- 'unable to download video info webpage', fatal=False,
- query=self._get_video_info_params(video_id, client=gvi_client))),
- ('player_response', 0), expected_type=str) or '{}', video_id)
- if pr:
- return pr
- self.report_warning('Falling back to embedded-only age-gate workaround')
-
- if not self._YT_CLIENTS.get(f'_{client}_embedded'):
- return
- embed_webpage = None
- if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
- embed_webpage = self._download_webpage(
- 'https://www.youtube.com/embed/%s?html5=1' % video_id,
- video_id=video_id, note=f'Downloading age-gated {client} embed config')
-
- ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
- # If we extracted the embed webpage, it'll tell us if we can view the video
- embedded_pr = self._parse_json(
- traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
- video_id=video_id)
- embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
- if embedded_ps_reason in self._AGE_GATE_REASONS:
- return
- return self._extract_player_response(
- f'_{client}_embedded', video_id,
- ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
- identity_token, player_url, initial_pr)
-
def _get_requested_clients(self, url, smuggled_data):
- requested_clients = [client for client in self._configuration_arg('player_client')
- if client[:0] != '_' and client in self._YT_CLIENTS]
+ requested_clients = []
+ allowed_clients = sorted(
+ [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
+ key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
+ for client in self._configuration_arg('player_client'):
+ if client in allowed_clients:
+ requested_clients.append(client)
+ elif client == 'all':
+ requested_clients.extend(allowed_clients)
+ else:
+ self.report_warning(f'Skipping unsupported client {client}')
if not requested_clients:
requested_clients = ['android', 'web']
return orderedSet(requested_clients)
+ def _extract_player_ytcfg(self, client, video_id):
+ url = {
+ 'web_music': 'https://music.youtube.com',
+ 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+ }.get(client)
+ if not url:
+ return {}
+ webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
+ return self.extract_ytcfg(video_id, webpage) or {}
+
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
initial_pr = None
if webpage:
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
video_id, 'initial player response')
- for client in clients:
+ original_clients = clients
+ clients = clients[::-1]
+ while clients:
+ client = clients.pop()
player_ytcfg = master_ytcfg if client == 'web' else {}
- if client == 'web' and initial_pr:
- pr = initial_pr
- else:
- if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
- ytm_webpage = self._download_webpage(
- 'https://music.youtube.com',
- video_id, fatal=False, note='Downloading remix client config')
- player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
- pr = self._extract_player_response(
- client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
+ if 'configs' not in self._configuration_arg('player_skip'):
+ player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
+
+ pr = (
+ initial_pr if client == 'web' and initial_pr
+ else self._extract_player_response(
+ client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
if pr:
yield pr
- if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
- pr = self._extract_age_gated_player_response(
- client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
- if pr:
- yield pr
+
+ if self._is_agegated(pr):
+ client = f'{client}_agegate'
+ if client in INNERTUBE_CLIENTS and client not in original_clients:
+ clients.append(client)
+
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
- if initial_pr and 'web' not in clients:
+ if initial_pr and 'web' not in original_clients:
initial_pr['streamingData'] = None
yield initial_pr
def _extract_formats(self, streaming_data, video_id, player_url, is_live):
itags, stream_ids = [], []
- itag_qualities = {}
+ itag_qualities, res_qualities = {}, {}
q = qualities([
- # "tiny" is the smallest video-only format. But some audio-only formats
- # was also labeled "tiny". It is not clear if such formats still exist
- 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
+ # Normally tiny is the smallest video-only formats. But
+ # audio-only formats with unknown quality may get tagged as tiny
+ 'tiny',
+ 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
continue
quality = fmt.get('quality')
+ height = int_or_none(fmt.get('height'))
if quality == 'tiny' or not quality:
quality = fmt.get('audioQuality', '').lower() or quality
- if itag and quality:
- itag_qualities[itag] = quality
+ # The 3gp format (17) in android client has a quality of "small",
+ # but is actually worse than other formats
+ if itag == '17':
+ quality = 'tiny'
+ if quality:
+ if itag:
+ itag_qualities[itag] = quality
+ if height:
+ res_qualities[height] = quality
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': itag,
'format_note': ', '.join(filter(None, (
- audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
+ audio_track.get('displayName'),
+ fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
'fps': int_or_none(fmt.get('fps')),
- 'height': int_or_none(fmt.get('height')),
+ 'height': height,
'quality': q(quality),
'tbr': tbr,
'url': fmt_url,
- 'width': fmt.get('width'),
+ 'width': int_or_none(fmt.get('width')),
'language': audio_track.get('id', '').split('.')[0],
}
mime_mobj = re.match(
if mime_mobj:
dct['ext'] = mimetype2ext(mime_mobj.group(1))
dct.update(parse_codecs(mime_mobj.group(2)))
- # The 3gp format in android client has a quality of "small",
- # but is actually worse than all other formats
- if dct['ext'] == '3gp':
- dct['quality'] = q('tiny')
- dct['preference'] = -10
no_audio = dct.get('acodec') == 'none'
no_video = dct.get('vcodec') == 'none'
if no_audio:
get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
+ def guess_quality(f):
+ for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
+ if val in qdict:
+ return q(qdict[val])
+ return -1
+
for sd in streaming_data:
hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
if hls_manifest_url:
- for f in self._extract_m3u8_formats(
- hls_manifest_url, video_id, 'mp4', fatal=False):
+ for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
itag = self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None)
if itag in itags:
if itag:
f['format_id'] = itag
itags.append(itag)
+ f['quality'] = guess_quality(f)
yield f
dash_manifest_url = get_dash and sd.get('dashManifestUrl')
if dash_manifest_url:
- for f in self._extract_mpd_formats(
- dash_manifest_url, video_id, fatal=False):
+ for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
itag = f['format_id']
if itag in itags:
continue
if itag:
itags.append(itag)
- if itag in itag_qualities:
- f['quality'] = q(itag_qualities[itag])
+ f['quality'] = guess_quality(f)
filesize = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url')
or f['url'], 'file size', default=None))
self.raise_no_formats(reason, expected=True)
for f in formats:
- # TODO: detect if throttled
- if '&n=' in f['url']: # possibly throttled
+ if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
f['source_preference'] = -10
- # note = f.get('format_note')
- # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
+ # TODO: this method is not reliable
+ f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
- self._sort_formats(formats)
+ # Source is given priority since formats that throttle are given lower source_preference
+ # When throttling issue is fully fixed, remove this
+ self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
keywords = get_first(video_details, 'keywords', expected_type=list) or []
if not keywords and webpage:
'release_timestamp': live_starttime,
}
- pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
+ pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
+ # Converted into dicts to remove duplicates
+ captions = {
+ sub.get('baseUrl'): sub
+ for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
+ translation_languages = {
+ lang.get('languageCode'): lang.get('languageName')
+ for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
subtitles = {}
if pctr:
def process_language(container, base_url, lang_code, sub_name, query):
'name': sub_name,
})
- for caption_track in (pctr.get('captionTracks') or []):
- base_url = caption_track.get('baseUrl')
+ for base_url, caption_track in captions.items():
if not base_url:
continue
if caption_track.get('kind') != 'asr':
continue
process_language(
subtitles, base_url, lang_code,
- try_get(caption_track, lambda x: x['name']['simpleText']),
+ traverse_obj(caption_track, ('name', 'simpleText')),
{})
continue
automatic_captions = {}
- for translation_language in (pctr.get('translationLanguages') or []):
- translation_language_code = translation_language.get('languageCode')
- if not translation_language_code:
+ for trans_code, trans_name in translation_languages.items():
+ if not trans_code:
continue
process_language(
- automatic_captions, base_url, translation_language_code,
- self._get_text(translation_language.get('languageName'), max_runs=1),
- {'tlang': translation_language_code})
+ automatic_captions, base_url, trans_code,
+ self._get_text(trans_name, max_runs=1),
+ {'tlang': trans_code})
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
})
vsir = content.get('videoSecondaryInfoRenderer')
if vsir:
- info['channel'] = self._get_text(try_get(
- vsir,
- lambda x: x['owner']['videoOwnerRenderer']['title'],
- dict))
+ info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
rows = try_get(
vsir,
lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
mrr_title = mrr.get('title')
if not mrr_title:
continue
- mrr_title = self._get_text(mrr['title'])
- mrr_contents_text = self._get_text(mrr['contents'][0])
+ mrr_title = self._get_text(mrr, 'title')
+ mrr_contents_text = self._get_text(mrr, ('contents', 0))
if mrr_title == 'License':
info['license'] = mrr_contents_text
elif not multiple_songs:
renderer = self._extract_basic_item_renderer(item)
if not isinstance(renderer, dict):
continue
- title = self._get_text(renderer.get('title'))
+ title = self._get_text(renderer, 'title')
# playlist
playlist_id = renderer.get('playlistId')
# will not work
if skip_channels and '/channels?' in shelf_url:
return
- title = self._get_text(shelf_renderer, lambda x: x['title'])
+ title = self._get_text(shelf_renderer, 'title')
yield self.url_result(shelf_url, video_title=title)
# Shelf may not contain shelf URL, fallback to extraction from content
for entry in self._shelf_entries_from_content(shelf_renderer):
renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
if not is_selected:
continue
- label = self._get_text(
- try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
+ label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
if label:
badge_labels.add(label.lower())
break