X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/2eb0f72a0ea6bdbf6889efe34027a534a3473a03..3cfd000849208b58dab4f78d1486d3f24552009e:/youtube_dl/extractor/youtube.py diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 08e821362..21731188a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -703,7 +703,7 @@ def _signature_cache_id(self, example_sig): def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( - r'.*?-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P[a-z]+)$', + r'.*?-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P[a-z]+)$', player_url) if not id_m: raise ExtractorError('Cannot identify player %r' % player_url) @@ -1074,7 +1074,7 @@ def add_dash_mpd(video_info): age_gate = False video_info = None # Try looking directly into the video webpage - mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) + mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});ytplayer', video_webpage) if mobj: json_code = uppercase_escape(mobj.group(1)) ytplayer_config = json.loads(json_code) @@ -1107,6 +1107,17 @@ def add_dash_mpd(video_info): if not video_info: video_info = get_video_info if 'token' in get_video_info: + # Different get_video_info requests may report different results, e.g. + # some may report video unavailability, but some may serve it without + # any complaint (see https://github.com/rg3/youtube-dl/issues/7362, + # the original webpage as well as el=info and el=embedded get_video_info + # requests report video unavailability due to geo restriction while + # el=detailpage succeeds and returns valid data). This is probably + # due to YouTube measures against IP ranges of hosting providers. + # Working around by preferring the first succeeded video_info containing + # the token if no such video_info yet was found. + if 'token' not in video_info: + video_info = get_video_info break if 'token' not in video_info: if 'reason' in video_info: @@ -1332,7 +1343,7 @@ def _map_to_format_list(urlmap): player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( - r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', + [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'], player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version @@ -1644,8 +1655,18 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'playlist_mincount': 91, 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'Uploads from lex will', } + }, { + 'note': 'Age restricted channel', + # from https://www.youtube.com/user/DeusExOfficial + 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w', + 'playlist_mincount': 64, + 'info_dict': { + 'id': 'UUs0ifCMCm1icqRbqhUINa0w', + 'title': 'Uploads from Deus Ex', + }, }] def _real_extract(self, url): @@ -1666,7 +1687,7 @@ def _real_extract(self, url): 'channelId', channel_page, 'channel id', default=None) if not channel_playlist_id: channel_playlist_id = self._search_regex( - r'data-channel-external-id="([^"]+)"', + r'data-(?:channel-external-|yt)id="([^"]+)"', channel_page, 'channel id', default=None) if channel_playlist_id and channel_playlist_id.startswith('UC'): playlist_id = 'UU' + channel_playlist_id[2:]