X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/e61b2178ebf5157bda5328143bb76729ffdf377c..bc2ca1bb75d586b75d83a6f60b680ee07227ff28:/youtube_dlc/extractor/vimeo.py diff --git a/youtube_dlc/extractor/vimeo.py b/youtube_dlc/extractor/vimeo.py index 9839657ca..ecfb5f0c5 100644 --- a/youtube_dlc/extractor/vimeo.py +++ b/youtube_dlc/extractor/vimeo.py @@ -116,7 +116,8 @@ def _set_vimeo_cookie(self, name, value): def _vimeo_sort_formats(self, formats): # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) + # But since yt-dlp prefers 'res,fps' anyway, 'field_preference' is not needed + self._sort_formats(formats) def _parse_config(self, config, video_id): video_data = config['video'] @@ -178,15 +179,9 @@ def _parse_config(self, config, video_id): formats.append({ 'format_id': 'live-archive-source', 'url': live_archive_source_url, - 'preference': 1, + 'quality': 10, }) - for f in formats: - if f.get('vcodec') == 'none': - f['preference'] = -50 - elif f.get('acodec') == 'none': - f['preference'] = -40 - subtitles = {} text_tracks = config['request'].get('text_tracks') if text_tracks: @@ -226,10 +221,12 @@ def _parse_config(self, config, video_id): 'is_live': is_live, } - def _extract_original_format(self, url, video_id): + def _extract_original_format(self, url, video_id, unlisted_hash=None): + query = {'action': 'load_download_config'} + if unlisted_hash: + query['unlisted_hash'] = unlisted_hash download_data = self._download_json( - url, video_id, fatal=False, - query={'action': 'load_download_config'}, + url, video_id, fatal=False, query=query, headers={'X-Requested-With': 'XMLHttpRequest'}) if download_data: source_file = download_data.get('source_file') @@ -249,7 +246,7 @@ def _extract_original_format(self, url, video_id): 'height': int_or_none(source_file.get('height')), 'filesize': parse_filesize(source_file.get('size')), 'format_id': source_name, - 'preference': 1, + 'quality': 1, } @@ -509,6 +506,11 @@ class VimeoIE(VimeoBaseInfoExtractor): { 'url': 'https://vimeo.com/160743502/abd0e13fb4', 'only_matching': True, + }, + { + # requires passing unlisted_hash(a52724358e) to load_download_config request + 'url': 'https://vimeo.com/392479337/a52724358e', + 'only_matching': True, } # https://gettingthingsdone.com/workflowmap/ # vimeo embed with check-password page protected by Referer header @@ -673,7 +675,8 @@ def _real_extract(self, url): if config.get('view') == 4: config = self._verify_player_video_password(redirect_url, video_id, headers) - vod = config.get('video', {}).get('vod', {}) + video = config.get('video') or {} + vod = video.get('vod') or {} def is_rented(): if '>You rented this title.<' in webpage: @@ -733,7 +736,7 @@ def is_rented(): formats = [] source_format = self._extract_original_format( - 'https://vimeo.com/' + video_id, video_id) + 'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash')) if source_format: formats.append(source_format) @@ -922,7 +925,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): }] _PAGE_SIZE = 100 - def _fetch_page(self, album_id, authorizaion, hashed_pass, page): + def _fetch_page(self, album_id, authorization, hashed_pass, page): api_page = page + 1 query = { 'fields': 'link,uri', @@ -934,7 +937,7 @@ def _fetch_page(self, album_id, authorizaion, hashed_pass, page): videos = self._download_json( 'https://api.vimeo.com/albums/%s/videos' % album_id, album_id, 'Downloading page %d' % api_page, query=query, headers={ - 'Authorization': 'jwt ' + authorizaion, + 'Authorization': 'jwt ' + authorization, })['data'] for video in videos: link = video.get('link') @@ -946,10 +949,13 @@ def _fetch_page(self, album_id, authorizaion, hashed_pass, page): def _real_extract(self, url): album_id = self._match_id(url) - webpage = self._download_webpage(url, album_id) - viewer = self._parse_json(self._search_regex( - r'bootstrap_data\s*=\s*({.+?})', - webpage, 'bootstrap data'), album_id)['viewer'] + viewer = self._download_json( + 'https://vimeo.com/_rv/viewer', album_id, fatal=False) + if not viewer: + webpage = self._download_webpage(url, album_id) + viewer = self._parse_json(self._search_regex( + r'bootstrap_data\s*=\s*({.+?})', + webpage, 'bootstrap data'), album_id)['viewer'] jwt = viewer['jwt'] album = self._download_json( 'https://api.vimeo.com/albums/' + album_id, @@ -1116,6 +1122,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor): IE_NAME = 'vhx:embed' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P\d+)' + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage) + return unescapeHTML(mobj.group(1)) if mobj else None + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -1124,5 +1136,6 @@ def _real_extract(self, url): 'ott data'), video_id, js_to_json)['config_url'] config = self._download_json(config_url, video_id) info = self._parse_config(config, video_id) + info['id'] = video_id self._vimeo_sort_formats(info['formats']) return info