X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/bfd973ece3369c593b5e82a88cc16de80088a73e..3ce2933693b66e5e8948352609c8258d8d2cec15:/yt_dlp/extractor/youtube.py diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f20b7321a..12634483e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -28,7 +28,6 @@ clean_html, datetime_from_str, dict_get, - error_to_compat_str, float_or_none, format_field, get_first, @@ -45,7 +44,6 @@ parse_iso8601, parse_qs, qualities, - remove_end, remove_start, smuggle_url, str_or_none, @@ -70,7 +68,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20211221.00.00', + 'clientVersion': '2.20220801.00.00', } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1 @@ -80,7 +78,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_EMBEDDED_PLAYER', - 'clientVersion': '1.20211215.00.01', + 'clientVersion': '1.20220731.00.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 56 @@ -91,7 +89,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_REMIX', - 'clientVersion': '1.20211213.00.00', + 'clientVersion': '1.20220727.01.00', } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, @@ -101,7 +99,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_CREATOR', - 'clientVersion': '1.20211220.02.00', + 'clientVersion': '1.20220726.00.00', } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, @@ -111,7 +109,8 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', - 'clientVersion': '16.49', + 'clientVersion': '17.29.34', + 'androidSdkVersion': 30 } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, @@ -122,7 +121,8 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_EMBEDDED_PLAYER', - 'clientVersion': '16.49', + 'clientVersion': '17.29.34', + 'androidSdkVersion': 30 }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, @@ -133,7 +133,8 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_MUSIC', - 'clientVersion': '4.57', + 'clientVersion': '5.16.51', + 'androidSdkVersion': 30 } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, @@ -144,7 +145,8 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_CREATOR', - 'clientVersion': '21.47', + 'clientVersion': '22.28.100', + 'androidSdkVersion': 30 }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, @@ -157,7 +159,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS', - 'clientVersion': '16.46', + 'clientVersion': '17.30.1', 'deviceModel': 'iPhone14,3', } }, @@ -168,7 +170,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MESSAGES_EXTENSION', - 'clientVersion': '16.46', + 'clientVersion': '17.30.1', 'deviceModel': 'iPhone14,3', }, }, @@ -180,7 +182,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MUSIC', - 'clientVersion': '4.57', + 'clientVersion': '5.18', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, @@ -190,7 +192,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_CREATOR', - 'clientVersion': '21.47', + 'clientVersion': '22.29.101', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, @@ -203,7 +205,7 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'MWEB', - 'clientVersion': '2.20211221.01.00', + 'clientVersion': '2.20220801.00.00', } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 @@ -763,74 +765,54 @@ def _extract_time_text(self, renderer, *path_list): def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, default_client='web'): - response = None - last_error = None - count = -1 - retries = self.get_param('extractor_retries', 3) - if check_get_keys is None: - check_get_keys = [] - while count < retries: - count += 1 - if last_error: - self.report_warning('%s. Retrying ...' % remove_end(last_error, '.')) + for retry in self.RetryManager(): try: response = self._call_api( ep=ep, fatal=True, headers=headers, - video_id=item_id, query=query, + video_id=item_id, query=query, note=note, context=self._extract_context(ytcfg, default_client), api_key=self._extract_api_key(ytcfg, default_client), - api_hostname=api_hostname, default_client=default_client, - note='%s%s' % (note, ' (retry #%d)' % count if count else '')) + api_hostname=api_hostname, default_client=default_client) except ExtractorError as e: - if isinstance(e.cause, network_exceptions): - if isinstance(e.cause, urllib.error.HTTPError): - first_bytes = e.cause.read(512) - if not is_html(first_bytes): - yt_error = try_get( - self._parse_json( - self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), - lambda x: x['error']['message'], str) - if yt_error: - self._report_alerts([('ERROR', yt_error)], fatal=False) - # Downloading page may result in intermittent 5xx HTTP error - # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 - # We also want to catch all other network exceptions since errors in later pages can be troublesome - # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 - if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429): - last_error = error_to_compat_str(e.cause or e.msg) - if count < retries: - continue - if fatal: - raise - else: - self.report_warning(error_to_compat_str(e)) - return + if not isinstance(e.cause, network_exceptions): + return self._error_or_warning(e, fatal=fatal) + elif not isinstance(e.cause, urllib.error.HTTPError): + retry.error = e + continue - else: - try: - self._extract_and_report_alerts(response, only_once=True) - except ExtractorError as e: - # YouTube servers may return errors we want to retry on in a 200 OK response - # See: https://github.com/yt-dlp/yt-dlp/issues/839 - if 'unknown error' in e.msg.lower(): - last_error = e.msg - continue - if fatal: - raise - self.report_warning(error_to_compat_str(e)) - return - if not check_get_keys or dict_get(response, check_get_keys): - break - # Youtube sometimes sends incomplete data - # See: https://github.com/ytdl-org/youtube-dl/issues/28194 - last_error = 'Incomplete data received' - if count >= retries: - if fatal: - raise ExtractorError(last_error) - else: - self.report_warning(last_error) - return - return response + first_bytes = e.cause.read(512) + if not is_html(first_bytes): + yt_error = try_get( + self._parse_json( + self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), + lambda x: x['error']['message'], str) + if yt_error: + self._report_alerts([('ERROR', yt_error)], fatal=False) + # Downloading page may result in intermittent 5xx HTTP error + # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 + # We also want to catch all other network exceptions since errors in later pages can be troublesome + # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 + if e.cause.code not in (403, 429): + retry.error = e + continue + return self._error_or_warning(e, fatal=fatal) + + try: + self._extract_and_report_alerts(response, only_once=True) + except ExtractorError as e: + # YouTube servers may return errors we want to retry on in a 200 OK response + # See: https://github.com/yt-dlp/yt-dlp/issues/839 + if 'unknown error' in e.msg.lower(): + retry.error = e + continue + return self._error_or_warning(e, fatal=fatal) + # Youtube sometimes sends incomplete data + # See: https://github.com/ytdl-org/youtube-dl/issues/28194 + if not traverse_obj(response, *variadic(check_get_keys)): + retry.error = ExtractorError('Incomplete data received', expected=True) + continue + + return response @staticmethod def is_music_url(url): @@ -2276,6 +2258,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': [], 'uploader_url': 'http://www.youtube.com/user/nao20010128nao', } + }, { + 'note': '6 channel audio', + 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo', + 'only_matching': True, } ] @@ -2667,7 +2653,8 @@ def _extract_n_function(self, video_id, player_url): if self.get_param('youtube_print_sig_code'): self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') - return lambda s: jsi.extract_function_from_code(*func_code)([s]) + func = jsi.extract_function_from_code(*func_code) + return lambda s: func([s]) def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): """ @@ -3155,7 +3142,14 @@ def append_client(*client_names): continue if pr: - prs.append(pr) + # YouTube may return a different video player response than expected. + # See: https://github.com/TeamNewPipe/NewPipe/issues/8713 + pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId')) + if pr_video_id and pr_video_id != video_id: + self.report_warning( + f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message()) + else: + prs.append(pr) # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: @@ -3174,7 +3168,7 @@ def append_client(*client_names): def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration): itags, stream_ids = {}, [] - itag_qualities, res_qualities = {}, {} + itag_qualities, res_qualities = {}, {0: -1} q = qualities([ # Normally tiny is the smallest video-only formats. But # audio-only formats with unknown quality may get tagged as tiny @@ -3253,9 +3247,9 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 else -1) # Some formats may have much smaller duration than others (possibly damaged during encoding) - # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 + # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 # Make sure to avoid false positives with small duration differences. - # Eg: __2ABJjxzNo, ySuUZEjARPY + # E.g. __2ABJjxzNo, ySuUZEjARPY is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) if is_damaged: self.report_warning( @@ -3268,10 +3262,13 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i '%s%s' % (audio_track.get('displayName') or '', ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), + try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), + try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 'source_preference': -10 if throttled else -5 if itag == '22' else -1, 'fps': int_or_none(fmt.get('fps')) or None, + 'audio_channels': fmt.get('audioChannels'), 'height': height, 'quality': q(quality), 'has_drm': bool(fmt.get('drmFamilies')), @@ -3323,10 +3320,9 @@ def process_manifest_format(f, proto, itag): f['format_id'] = itag itags[itag] = proto - f['quality'] = next(( - q(qdict[val]) - for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities)) - if val in qdict), -1) + f['quality'] = itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1) + if f['quality'] == -1 and f.get('height'): + f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))]) return True subtitles = {} @@ -3592,7 +3588,8 @@ def feed_entry(name): formats.extend(self._extract_storyboard(player_responses, duration)) # source_preference is lower for throttled/potentially damaged formats - self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) + self._sort_formats(formats, ( + 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')) info = { 'id': video_id, @@ -4522,48 +4519,30 @@ def skip_webpage(self): return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) def _extract_webpage(self, url, item_id, fatal=True): - retries = self.get_param('extractor_retries', 3) - count = -1 - webpage = data = last_error = None - while count < retries: - count += 1 - # Sometimes youtube returns a webpage with incomplete ytInitialData - # See: https://github.com/yt-dlp/yt-dlp/issues/116 - if last_error: - self.report_warning('%s. Retrying ...' % last_error) + webpage, data = None, None + for retry in self.RetryManager(fatal=fatal): try: - webpage = self._download_webpage( - url, item_id, - note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',)) + webpage = self._download_webpage(url, item_id, note='Downloading webpage') data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} except ExtractorError as e: if isinstance(e.cause, network_exceptions): if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429): - last_error = error_to_compat_str(e.cause or e.msg) - if count < retries: - continue - if fatal: - raise - self.report_warning(error_to_compat_str(e)) + retry.error = e + continue + self._error_or_warning(e, fatal=fatal) break - else: - try: - self._extract_and_report_alerts(data) - except ExtractorError as e: - if fatal: - raise - self.report_warning(error_to_compat_str(e)) - break - if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')): - break + try: + self._extract_and_report_alerts(data) + except ExtractorError as e: + self._error_or_warning(e, fatal=fatal) + break - last_error = 'Incomplete yt initial data received' - if count >= retries: - if fatal: - raise ExtractorError(last_error) - self.report_warning(last_error) - break + # Sometimes youtube returns a webpage with incomplete ytInitialData + # See: https://github.com/yt-dlp/yt-dlp/issues/116 + if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'): + retry.error = ExtractorError('Incomplete yt initial data received') + continue return webpage, data @@ -5854,7 +5833,7 @@ def _real_extract(self, url): class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): - IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)' + IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs' IE_NAME = 'youtube:music:search_url' _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)' _TESTS = [{