parse_count,
parse_duration,
parse_iso8601,
+ parse_qs,
qualities,
+ remove_end,
remove_start,
smuggle_url,
str_or_none,
unsmuggle_url,
update_url_query,
url_or_none,
- urlencode_postdata,
urljoin,
variadic,
)
-def parse_qs(url):
- return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-
-
# any clients starting with _ cannot be explicity requested by the user
INNERTUBE_CLIENTS = {
'web': {
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
+ 'REQUIRE_JS_PLAYER': False
},
'android_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'clientVersion': '16.20',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
+ 'REQUIRE_JS_PLAYER': False
},
'android_music': {
'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
+ 'REQUIRE_JS_PLAYER': False
},
'android_creator': {
'INNERTUBE_CONTEXT': {
'clientVersion': '21.24.100',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 14
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
+ 'REQUIRE_JS_PLAYER': False
},
# ios has HLS live streams
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
'clientVersion': '16.20',
}
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
+ 'REQUIRE_JS_PLAYER': False
},
'ios_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
'clientVersion': '16.20',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
+ 'REQUIRE_JS_PLAYER': False
},
'ios_music': {
'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
'clientVersion': '4.32',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
+ 'REQUIRE_JS_PLAYER': False
},
'ios_creator': {
'INNERTUBE_CONTEXT': {
'clientVersion': '21.24.100',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 15
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
+ 'REQUIRE_JS_PLAYER': False
},
# mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
+ ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
if message:
yield alert_type, message
- def _report_alerts(self, alerts, expected=True, fatal=True):
+ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
errors = []
warnings = []
for alert_type, alert_message in alerts:
warnings.append([alert_type, alert_message])
for alert_type, alert_message in (warnings + errors[:-1]):
- self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
+ self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
if errors:
raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
while count < retries:
count += 1
if last_error:
- self.report_warning('%s. Retrying ...' % last_error)
+ self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
- last_error = error_to_compat_str(e.cause or e)
+ last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
if fatal:
else:
# Youtube may send alerts if there was an issue with the continuation page
try:
- self._extract_and_report_alerts(response, expected=False)
+ self._extract_and_report_alerts(response, expected=False, only_once=True)
except ExtractorError as e:
+ # YouTube servers may return errors we want to retry on in a 200 OK response
+ # See: https://github.com/yt-dlp/yt-dlp/issues/839
+ if 'unknown error' in e.msg.lower():
+ last_error = e.msg
+ continue
if fatal:
raise
self.report_warning(error_to_compat_str(e))
'_rtmp': {'protocol': 'rtmp'},
# av01 video only formats sometimes served with "unknown" codecs
- '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
+ '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
+ '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
+ '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
+ '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
+ '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
+ '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
+ '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
}
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
@classmethod
def suitable(cls, url):
- # Hack for lazy extractors until more generic solution is implemented
- # (see #28780)
- from .youtube import parse_qs
+ from ..utils import parse_qs
+
qs = parse_qs(url)
if qs.get('list', [None])[0]:
return False
self._code_cache = {}
self._player_cache = {}
- def _extract_player_url(self, ytcfg=None, webpage=None):
- player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
- if not player_url and webpage:
- player_url = self._search_regex(
- r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
- webpage, 'player URL', fatal=False)
+ def _extract_player_url(self, *ytcfgs, webpage=None):
+ player_url = traverse_obj(
+ ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
+ get_all=False, expected_type=compat_str)
if not player_url:
- return None
+ return
if player_url.startswith('//'):
player_url = 'https:' + player_url
elif not re.match(r'https?://', player_url):
'https://www.youtube.com', player_url)
return player_url
+ def _download_player_url(self, video_id, fatal=False):
+ res = self._download_webpage(
+ 'https://www.youtube.com/iframe_api',
+ note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
+ if res:
+ player_version = self._search_regex(
+ r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
+ if player_version:
+ return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
+
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
- sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
+ sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
headers = self.generate_api_headers(
player_ytcfg, identity_token, syncid,
default_client=client, session_index=session_index)
webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
return self.extract_ytcfg(video_id, webpage) or {}
- def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
+ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, identity_token):
initial_pr = None
if webpage:
initial_pr = self._extract_yt_initial_variable(
original_clients = clients
clients = clients[::-1]
+ prs = []
def append_client(client_name):
if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
- yielded_pr = False
if initial_pr:
pr = dict(initial_pr)
pr['streamingData'] = None
- yielded_pr = True
- yield pr
+ prs.append(pr)
last_error = None
+ tried_iframe_fallback = False
+ player_url = None
while clients:
client = clients.pop()
player_ytcfg = master_ytcfg if client == 'web' else {}
if 'configs' not in self._configuration_arg('player_skip'):
player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
+ player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
+ require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
+ if 'js' in self._configuration_arg('player_skip'):
+ require_js_player = False
+ player_url = None
+
+ if not player_url and not tried_iframe_fallback and require_js_player:
+ player_url = self._download_player_url(video_id)
+ tried_iframe_fallback = True
+
try:
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
- client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
+ client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url if require_js_player else None, initial_pr)
except ExtractorError as e:
if last_error:
self.report_warning(last_error)
continue
if pr:
- yielded_pr = True
- yield pr
+ prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
append_client(f'{client}_agegate')
if last_error:
- if not yielded_pr:
+ if not len(prs):
raise last_error
self.report_warning(last_error)
+ return prs, player_url
def _extract_formats(self, streaming_data, video_id, player_url, is_live):
itags, stream_ids = [], []
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': itag,
'format_note': ', '.join(filter(None, (
- audio_track.get('displayName'),
+ '%s%s' % (audio_track.get('displayName') or '',
+ ' (default)' if audio_track.get('audioIsDefault') else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
'fps': int_or_none(fmt.get('fps')),
'height': height,
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
'language': audio_track.get('id', '').split('.')[0],
+ 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
base_url = self.http_scheme() + '//www.youtube.com/'
webpage_url = base_url + 'watch?v=' + video_id
- webpage = self._download_webpage(
- webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+ webpage = None
+ if 'webpage' not in self._configuration_arg('player_skip'):
+ webpage = self._download_webpage(
+ webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
- player_url = self._extract_player_url(master_ytcfg, webpage)
identity_token = self._extract_identity_token(webpage, video_id)
- player_responses = list(self._extract_player_responses(
+ player_responses, player_url = self._extract_player_responses(
self._get_requested_clients(url, smuggled_data),
- video_id, webpage, master_ytcfg, player_url, identity_token))
+ video_id, webpage, master_ytcfg, identity_token)
get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
if not formats:
if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
- self.raise_no_formats(
- 'This video is DRM protected.', expected=True)
+ self.report_drm(video_id)
pemr = get_first(
playability_statuses,
('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
# Source is given priority since formats that throttle are given lower source_preference
# When throttling issue is fully fixed, remove this
- self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
+ self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
keywords = get_first(video_details, 'keywords', expected_type=list) or []
if not keywords and webpage:
continue
process_language(
subtitles, base_url, lang_code,
- traverse_obj(caption_track, ('name', 'simpleText')),
+ traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
{})
continue
automatic_captions = {}
needs_auth=info['age_limit'] >= 18,
is_unlisted=None if is_private is None else is_unlisted)
- # get xsrf for annotations or comments
- get_annotations = self.get_param('writeannotations', False)
- get_comments = self.get_param('getcomments', False)
- if get_annotations or get_comments:
- xsrf_token = None
- if master_ytcfg:
- xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
- if not xsrf_token:
- xsrf_token = self._search_regex(
- r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
- webpage, 'xsrf token', group='xsrf_token', fatal=False)
-
- # annotations
- if get_annotations:
- invideo_url = get_first(
- player_responses,
- ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
- expected_type=str)
- if xsrf_token and invideo_url:
- xsrf_field_name = None
- if master_ytcfg:
- xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
- if not xsrf_field_name:
- xsrf_field_name = self._search_regex(
- r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
- webpage, 'xsrf field name',
- group='xsrf_field_name', default='session_token')
- info['annotations'] = self._download_webpage(
- self._proto_relative_url(invideo_url),
- video_id, note='Downloading annotations',
- errnote='Unable to download video annotations', fatal=False,
- data=urlencode_postdata({xsrf_field_name: xsrf_token}))
-
- if get_comments:
+ if self.get_param('getcomments', False):
info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
self.mark_watched(video_id, player_responses)
# YouTube sometimes provides a button to reload playlist with unavailable videos.
if 'no-youtube-unavailable-videos' not in compat_opts:
data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
- self._extract_and_report_alerts(data)
+ self._extract_and_report_alerts(data, only_once=True)
tabs = try_get(
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
if tabs:
return cls._VALID_URL
def _real_extract(self, url):
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs = parse_qs(url)
query = (qs.get('search_query') or qs.get('q'))[0]
self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
return self._get_n_results(query, self._MAX_RESULTS)