X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/f9934b96145af8ac5dfdcbf684827aeaea9912a7..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/vimeo.py diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index a00b387f3..7e79032f2 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,40 +1,33 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import functools -import re import itertools +import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urlparse, -) +from ..networking import HEADRequest, Request +from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + OnDemandPagedList, clean_html, determine_ext, - ExtractorError, get_element_by_class, - HEADRequest, - js_to_json, int_or_none, + js_to_json, merge_dicts, - OnDemandPagedList, parse_filesize, parse_iso8601, parse_qs, - sanitized_Request, smuggle_url, str_or_none, + traverse_obj, try_get, unified_timestamp, unsmuggle_url, urlencode_postdata, - urljoin, - unescapeHTML, urlhandle_detect_ext, + urljoin, ) @@ -43,18 +36,28 @@ class VimeoBaseInfoExtractor(InfoExtractor): _LOGIN_REQUIRED = False _LOGIN_URL = 'https://vimeo.com/log_in' + @staticmethod + def _smuggle_referrer(url, referrer_url): + return smuggle_url(url, {'referer': referrer_url}) + + def _unsmuggle_headers(self, url): + """@returns (url, smuggled_data, headers)""" + url, data = unsmuggle_url(url, {}) + headers = self.get_param('http_headers').copy() + if 'referer' in data: + headers['Referer'] = data['referer'] + return url, data, headers + def _perform_login(self, username, password): - webpage = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - token, vuid = self._extract_xsrft_and_vuid(webpage) + viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token') data = { 'action': 'login', 'email': username, 'password': password, 'service': 'vimeo', - 'token': token, + 'token': viewer['xsrft'], } - self._set_vimeo_cookie('vuid', vuid) + self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_webpage( self._LOGIN_URL, None, 'Logging in', @@ -63,7 +66,7 @@ def _perform_login(self, username, password): 'Referer': self._LOGIN_URL, }) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418: + if isinstance(e.cause, HTTPError) and e.cause.status == 418: raise ExtractorError( 'Unable to log in: bad username or password', expected=True) @@ -115,16 +118,17 @@ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) - def _vimeo_sort_formats(self, formats): - # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps - # at the same time without actual units specified. - self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source')) - def _parse_config(self, config, video_id): video_data = config['video'] - video_title = video_data['title'] + video_title = video_data.get('title') live_event = video_data.get('live_event') or {} - is_live = live_event.get('status') == 'started' + live_status = { + 'pending': 'is_upcoming', + 'active': 'is_upcoming', + 'started': 'is_live', + 'ended': 'post_live', + }.get(live_event.get('status')) + is_live = live_status == 'is_live' request = config.get('request') or {} formats = [] @@ -137,7 +141,7 @@ def _parse_config(self, config, video_id): continue formats.append({ 'url': video_url, - 'format_id': 'http-%s' % f.get('quality'), + 'format_id': 'http-{}'.format(f.get('quality')), 'source_preference': 10, 'width': int_or_none(f.get('width')), 'height': int_or_none(f.get('height')), @@ -152,19 +156,19 @@ def _parse_config(self, config, video_id): manifest_url = cdn_data.get('url') if not manifest_url: continue - format_id = '%s-%s' % (files_type, cdn_name) + format_id = f'{files_type}-{cdn_name}' sep_manifest_urls = [] if re.search(sep_pattern, manifest_url): for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): sep_manifest_urls.append((format_id + suffix, re.sub( - sep_pattern, '/%s/' % repl, manifest_url))) + sep_pattern, f'/{repl}/', manifest_url))) else: sep_manifest_urls = [(format_id, manifest_url)] for f_id, m_url in sep_manifest_urls: if files_type == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles( m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id, - note='Downloading %s m3u8 information' % cdn_name, + note=f'Downloading {cdn_name} m3u8 information', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) @@ -175,7 +179,7 @@ def _parse_config(self, config, video_id): m_url = real_m_url fmts, subs = self._extract_mpd_formats_and_subtitles( m_url.replace('/master.json', '/master.mpd'), video_id, f_id, - 'Downloading %s MPD information' % cdn_name, + f'Downloading {cdn_name} MPD information', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) @@ -233,7 +237,11 @@ def _parse_config(self, config, video_id): 'chapters': chapters or None, 'formats': formats, 'subtitles': subtitles, - 'is_live': is_live, + 'live_status': live_status, + 'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})), + # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps + # at the same time without actual units specified. + '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'), } def _extract_original_format(self, url, video_id, unlisted_hash=None): @@ -248,10 +256,10 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None): download_url = try_get(source_file, lambda x: x['download_url']) if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original') - if self._is_valid_url(download_url, video_id, '%s video' % source_name): + if self._is_valid_url(download_url, video_id, f'{source_name} video'): ext = (try_get( source_file, lambda x: x['extension'], - compat_str) or determine_ext( + str) or determine_ext( download_url, None) or 'mp4').lower() return { 'url': download_url, @@ -267,7 +275,7 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None): 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} if not jwt_response.get('jwt'): return - headers = {'Authorization': 'jwt %s' % jwt_response['jwt']} + headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'} original_response = self._download_json( f'https://api.vimeo.com/videos/{video_id}', video_id, headers=headers, fatal=False, expected_status=(403, 404)) or {} @@ -297,28 +305,42 @@ class VimeoIE(VimeoBaseInfoExtractor): # _VALID_URL matches Vimeo URLs _VALID_URL = r'''(?x) - https?:// - (?: - (?: - www| - player - ) - \. - )? - vimeo(?:pro)?\.com/ - (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) - (?:[^/]+/)*? - (?: - (?: - play_redirect_hls| - moogaloop\.swf)\?clip_id= - )? - (?:videos?/)? - (?P[0-9]+) - (?:/(?P[\da-f]{10}))? - /?(?:[?&].*)?(?:[#].*)?$ - ''' + https?:// + (?: + (?: + www| + player + ) + \. + )? + vimeo\.com/ + (?: + (?Puser)| + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?:.*?/)?? + (?P + (?: + play_redirect_hls| + moogaloop\.swf)\?clip_id= + )? + (?:videos?/)? + ) + (?P[0-9]+) + (?(u) + /(?!videos|likes)[^/?#]+/?| + (?(q)|/(?P[\da-f]{10}))? + ) + (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$ + ''' IE_NAME = 'vimeo' + _EMBED_REGEX = [ + # iframe + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1', + # Embedded (swf embed) Vimeo player + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', + # Non-standard embedded Vimeo player + r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1', + ] _TESTS = [ { 'url': 'http://vimeo.com/56015672#at=0', @@ -339,36 +361,11 @@ class VimeoIE(VimeoBaseInfoExtractor): 'params': { 'format': 'best[protocol=https]', }, - 'skip': 'No longer available' - }, - { - 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', - 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', - 'note': 'Vimeo Pro video (#1197)', - 'info_dict': { - 'id': '68093876', - 'ext': 'mp4', - 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus', - 'uploader_id': 'openstreetmapus', - 'uploader': 'OpenStreetMap US', - 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', - 'description': 'md5:2c362968038d4499f4d79f88458590c1', - 'duration': 1595, - 'upload_date': '20130610', - 'timestamp': 1370893156, - 'license': 'by', - 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960', - 'view_count': int, - 'comment_count': int, - 'like_count': int, - }, - 'params': { - 'format': 'best[protocol=https]', - }, + 'skip': 'No longer available', }, { 'url': 'http://player.vimeo.com/video/54469442', - 'md5': 'b3e7f4d2cbb53bd7dc3bb6ff4ed5cfbd', + 'md5': '619b811a4417aa4abe78dc653becf511', 'note': 'Videos that embed the url in the player page', 'info_dict': { 'id': '54469442', @@ -378,7 +375,6 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware', 'uploader_id': 'businessofsoftware', 'duration': 3610, - 'description': None, 'thumbnail': 'https://i.vimeocdn.com/video/376682406-f34043e7b766af6bef2af81366eacd6724f3fc3173179a11a97a1e26587c9529-d_1280', }, 'params': { @@ -399,8 +395,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', - 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960', + 'description': 'md5:6173f270cd0c0119f22817204b3eb86c', + 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280', 'view_count': int, 'comment_count': int, 'like_count': int, @@ -417,7 +413,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '75629013', 'ext': 'mp4', 'title': 'Key & Peele: Terrorist Interrogation', - 'description': 'md5:8678b246399b070816b12313e8b4eb5c', + 'description': 'md5:6173f270cd0c0119f22817204b3eb86c', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', @@ -569,8 +565,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', - 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960', + 'description': 'md5:6173f270cd0c0119f22817204b3eb86c', + 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280', 'view_count': int, 'comment_count': int, 'like_count': int, @@ -715,38 +711,24 @@ class VimeoIE(VimeoBaseInfoExtractor): 'params': { 'skip_download': True, }, - } + }, + { + # user playlist alias -> https://vimeo.com/258705797 + 'url': 'https://vimeo.com/user26785108/newspiritualguide', + 'only_matching': True, + }, # https://gettingthingsdone.com/workflowmap/ # vimeo embed with check-password page protected by Referer header ] - @staticmethod - def _smuggle_referrer(url, referrer_url): - return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) + @classmethod + def _extract_embed_urls(cls, url, webpage): + for embed_url in super()._extract_embed_urls(url, webpage): + yield cls._smuggle_referrer(embed_url, url) - @staticmethod - def _extract_urls(url, webpage): - urls = [] - # Look for embedded (iframe) Vimeo player - for mobj in re.finditer( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1', - webpage): - urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) - PLAIN_EMBED_RE = ( - # Look for embedded (swf embed) Vimeo player - r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', - # Look more for non-standard embedded Vimeo player - r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1', - ) - for embed_re in PLAIN_EMBED_RE: - for mobj in re.finditer(embed_re, webpage): - urls.append(mobj.group('url')) - return urls - - @staticmethod - def _extract_url(url, webpage): - urls = VimeoIE._extract_urls(url, webpage) - return urls[0] if urls else None + @classmethod + def _extract_url(cls, url, webpage): + return next(cls._extract_embed_urls(url, webpage), None) def _verify_player_video_password(self, url, video_id, headers): password = self._get_video_password() @@ -757,8 +739,8 @@ def _verify_player_video_password(self, url, video_id, headers): 'Content-Type': 'application/x-www-form-urlencoded', }) checked = self._download_json( - url + '/check-password', video_id, - 'Verifying the password', data=data, headers=headers) + f'{urllib.parse.urlsplit(url)._replace(query=None).geturl()}/check-password', + video_id, 'Verifying the password', data=data, headers=headers) if checked is False: raise ExtractorError('Wrong video password', expected=True) return checked @@ -766,7 +748,7 @@ def _verify_player_video_password(self, url, video_id, headers): def _extract_from_api(self, video_id, unlisted_hash=None): token = self._download_json( 'https://vimeo.com/_rv/jwt', video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', })['token'] api_url = 'https://api.vimeo.com/videos/' + video_id if unlisted_hash: @@ -774,12 +756,12 @@ def _extract_from_api(self, video_id, unlisted_hash=None): video = self._download_json( api_url, video_id, headers={ 'Authorization': 'jwt ' + token, + 'Accept': 'application/json', }, query={ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', }) info = self._parse_config(self._download_json( video['config_url'], video_id), video_id) - self._vimeo_sort_formats(info['formats']) get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) info.update({ 'description': video.get('description'), @@ -809,7 +791,7 @@ def _try_album_password(self, url): jwt = viewer['jwt'] album = self._download_json( 'https://api.vimeo.com/albums/' + album_id, - album_id, headers={'Authorization': 'jwt ' + jwt}, + album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'}, query={'fields': 'description,name,privacy'}) if try_get(album, lambda x: x['privacy']['view']) == 'password': password = self.get_param('videopassword') @@ -820,7 +802,7 @@ def _try_album_password(self, url): self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_json( - 'https://vimeo.com/showcase/%s/auth' % album_id, + f'https://vimeo.com/showcase/{album_id}/auth', album_id, 'Verifying the password', data=urlencode_postdata({ 'password': password, 'token': viewer['xsrft'], @@ -828,15 +810,12 @@ def _try_album_password(self, url): 'X-Requested-With': 'XMLHttpRequest', }) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: raise ExtractorError('Wrong password', expected=True) raise def _real_extract(self, url): - url, data = unsmuggle_url(url, {}) - headers = self.get_param('http_headers').copy() - if 'http_headers' in data: - headers.update(data['http_headers']) + url, data, headers = self._unsmuggle_headers(url) if 'Referer' not in headers: headers['Referer'] = url @@ -846,15 +825,7 @@ def _real_extract(self, url): if unlisted_hash: return self._extract_from_api(video_id, unlisted_hash) - orig_url = url - is_pro = 'vimeopro.com/' in url - if is_pro: - # some videos require portfolio_id to be present in player url - # https://github.com/ytdl-org/youtube-dl/issues/20070 - url = self._extract_url(url, self._download_webpage(url, video_id)) - if not url: - url = 'https://vimeo.com/' + video_id - elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): + if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id self._try_album_password(url) @@ -862,10 +833,10 @@ def _real_extract(self, url): # Retrieve video webpage to extract further information webpage, urlh = self._download_webpage_handle( url, video_id, headers=headers) - redirect_url = urlh.geturl() + redirect_url = urlh.url except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: - errmsg = ee.cause.read() + if isinstance(ee.cause, HTTPError) and ee.cause.status == 403: + errmsg = ee.cause.response.read() if b'Because of its privacy settings, this video cannot be played here' in errmsg: raise ExtractorError( 'Cannot download embed-only video without embedding ' @@ -875,14 +846,12 @@ def _real_extract(self, url): raise if '://player.vimeo.com/video/' in url: - config = self._parse_json(self._search_regex( - r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) + config = self._search_json( + r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id) if config.get('view') == 4: config = self._verify_player_video_password( redirect_url, video_id, headers) - info = self._parse_config(config, video_id) - self._vimeo_sort_formats(info['formats']) - return info + return self._parse_config(config, video_id) if re.search(r']+?id="pw_form"', webpage): video_password = self._get_video_password() @@ -895,7 +864,7 @@ def _real_extract(self, url): seed_status = vimeo_config.get('seed_status') or {} if seed_status.get('state') == 'failed': raise ExtractorError( - '%s said: %s' % (self.IE_NAME, seed_status['title']), + '{} said: {}'.format(self.IE_NAME, seed_status['title']), expected=True) cc_license = None @@ -947,7 +916,7 @@ def is_rented(): feature_id = vod.get('feature_id') if feature_id and not data.get('force_feature_id', False): return self.url_result(smuggle_url( - 'https://player.vimeo.com/player/%s' % feature_id, + f'https://player.vimeo.com/player/{feature_id}', {'force_feature_id': True}), 'Vimeo') if not video_description: @@ -958,14 +927,6 @@ def is_rented(): video_description = self._html_search_meta( ['description', 'og:description', 'twitter:description'], webpage, default=None) - if not video_description and is_pro: - orig_webpage = self._download_webpage( - orig_url, video_id, - note='Downloading webpage for description', - fatal=False) - if orig_webpage: - video_description = self._html_search_meta( - 'description', orig_webpage, default=None) if not video_description: self.report_warning('Cannot find video description') @@ -987,7 +948,7 @@ def is_rented(): info_dict_config = self._parse_config(config, video_id) formats.extend(info_dict_config['formats']) - self._vimeo_sort_formats(formats) + info_dict['_format_sort_fields'] = info_dict_config['_format_sort_fields'] json_ld = self._search_json_ld(webpage, video_id, default={}) @@ -1010,7 +971,7 @@ def is_rented(): return merge_dicts(info_dict, info_dict_config, json_ld) -class VimeoOndemandIE(VimeoIE): +class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:ondemand' _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P[^/?#&]+)' _TESTS = [{ @@ -1090,7 +1051,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s' def _page_url(self, base_url, pagenum): - return '%s/videos/page:%d/' % (base_url, pagenum) + return f'{base_url}/videos/page:{pagenum}/' def _extract_list_title(self, webpage): return self._TITLE or self._html_search_regex( @@ -1101,7 +1062,7 @@ def _title_and_entries(self, list_id, base_url): page_url = self._page_url(base_url, pagenum) webpage = self._download_webpage( page_url, list_id, - 'Downloading page %s' % pagenum) + f'Downloading page {pagenum}') if pagenum == 1: yield self._extract_list_title(webpage) @@ -1113,13 +1074,13 @@ def _title_and_entries(self, list_id, base_url): if clips: for video_id, video_url, video_title in clips: yield self.url_result( - compat_urlparse.urljoin(base_url, video_url), + urllib.parse.urljoin(base_url, video_url), VimeoIE.ie_key(), video_id=video_id, video_title=video_title) # More relaxed fallback else: for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): yield self.url_result( - 'https://vimeo.com/%s' % video_id, + f'https://vimeo.com/{video_id}', VimeoIE.ie_key(), video_id=video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: @@ -1135,9 +1096,9 @@ def _real_extract(self, url): return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id) -class VimeoUserIE(VimeoChannelIE): +class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:user' - _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos)?/?(?:$|[?#])' _TITLE_RE = r']+?class="user">([^<>]+?)' _TESTS = [{ 'url': 'https://vimeo.com/nkistudio/videos', @@ -1146,6 +1107,9 @@ class VimeoUserIE(VimeoChannelIE): 'id': 'nkistudio', }, 'playlist_mincount': 66, + }, { + 'url': 'https://vimeo.com/nkistudio/', + 'only_matching': True, }] _BASE_URL_TEMPL = 'https://vimeo.com/%s' @@ -1171,7 +1135,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): 'playlist_count': 1, 'params': { 'videopassword': 'youtube-dl', - } + }, }] _PAGE_SIZE = 100 @@ -1186,13 +1150,15 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page): query['_hashed_pass'] = hashed_pass try: videos = self._download_json( - 'https://api.vimeo.com/albums/%s/videos' % album_id, - album_id, 'Downloading page %d' % api_page, query=query, headers={ + f'https://api.vimeo.com/albums/{album_id}/videos', + album_id, f'Downloading page {api_page}', query=query, headers={ 'Authorization': 'jwt ' + authorization, + 'Accept': 'application/json', })['data'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + if isinstance(e.cause, HTTPError) and e.cause.status == 400: return + raise for video in videos: link = video.get('link') if not link: @@ -1213,7 +1179,7 @@ def _real_extract(self, url): jwt = viewer['jwt'] album = self._download_json( 'https://api.vimeo.com/albums/' + album_id, - album_id, headers={'Authorization': 'jwt ' + jwt}, + album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'}, query={'fields': 'description,name,privacy'}) hashed_pass = None if try_get(album, lambda x: x['privacy']['view']) == 'password': @@ -1225,7 +1191,7 @@ def _real_extract(self, url): self._set_vimeo_cookie('vuid', viewer['vuid']) try: hashed_pass = self._download_json( - 'https://vimeo.com/showcase/%s/auth' % album_id, + f'https://vimeo.com/showcase/{album_id}/auth', album_id, 'Verifying the password', data=urlencode_postdata({ 'password': password, 'token': viewer['xsrft'], @@ -1233,7 +1199,7 @@ def _real_extract(self, url): 'X-Requested-With': 'XMLHttpRequest', })['hashed_pass'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: raise ExtractorError('Wrong password', expected=True) raise entries = OnDemandPagedList(functools.partial( @@ -1242,7 +1208,7 @@ def _real_extract(self, url): entries, album_id, album.get('name'), album.get('description')) -class VimeoGroupsIE(VimeoChannelIE): +class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:group' _VALID_URL = r'https://vimeo\.com/groups/(?P[^/]+)(?:/(?!videos?/\d+)|$)' _TESTS = [{ @@ -1329,14 +1295,13 @@ def _real_extract(self, url): page_url + '/action', video_id) if source_format: info_dict['formats'].append(source_format) - self._vimeo_sort_formats(info_dict['formats']) info_dict['description'] = clean_html(clip_data.get('description')) return info_dict -class VimeoWatchLaterIE(VimeoChannelIE): +class VimeoWatchLaterIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:watchlater' - IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' + IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)' _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater' _TITLE = 'Watch Later' _LOGIN_REQUIRED = True @@ -1346,18 +1311,18 @@ class VimeoWatchLaterIE(VimeoChannelIE): }] def _page_url(self, base_url, pagenum): - url = '%s/page:%d/' % (base_url, pagenum) - request = sanitized_Request(url) + url = f'{base_url}/page:{pagenum}/' + request = Request(url) # Set the header to get a partial html page with the ids, # the normal page doesn't contain them. - request.add_header('X-Requested-With', 'XMLHttpRequest') + request.headers['X-Requested-With'] = 'XMLHttpRequest' return request def _real_extract(self, url): return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') -class VimeoLikesIE(VimeoChannelIE): +class VimeoLikesIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P[^/]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' @@ -1374,31 +1339,117 @@ class VimeoLikesIE(VimeoChannelIE): }] def _page_url(self, base_url, pagenum): - return '%s/page:%d/' % (base_url, pagenum) + return f'{base_url}/page:{pagenum}/' def _real_extract(self, url): user_id = self._match_id(url) - return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id) + return self._extract_videos(user_id, f'https://vimeo.com/{user_id}/likes') class VHXEmbedIE(VimeoBaseInfoExtractor): IE_NAME = 'vhx:embed' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P\d+)' + _EMBED_REGEX = [r']+src="(?Phttps?://embed\.vhx\.tv/videos/\d+[^"]*)"'] - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r']+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage) - return unescapeHTML(mobj.group(1)) if mobj else None + @classmethod + def _extract_embed_urls(cls, url, webpage): + for embed_url in super()._extract_embed_urls(url, webpage): + yield cls._smuggle_referrer(embed_url, url) def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + url, _, headers = self._unsmuggle_headers(url) + webpage = self._download_webpage(url, video_id, headers=headers) config_url = self._parse_json(self._search_regex( r'window\.OTTData\s*=\s*({.+})', webpage, 'ott data'), video_id, js_to_json)['config_url'] config = self._download_json(config_url, video_id) info = self._parse_config(config, video_id) info['id'] = video_id - self._vimeo_sort_formats(info['formats']) return info + + +class VimeoProIE(VimeoBaseInfoExtractor): + IE_NAME = 'vimeo:pro' + _VALID_URL = r'https?://(?:www\.)?vimeopro\.com/[^/?#]+/(?P[^/?#]+)(?:(?:/videos?/(?P[0-9]+)))?' + _TESTS = [{ + # Vimeo URL derived from video_id + 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', + 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', + 'note': 'Vimeo Pro video (#1197)', + 'info_dict': { + 'id': '68093876', + 'ext': 'mp4', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus', + 'uploader_id': 'openstreetmapus', + 'uploader': 'OpenStreetMap US', + 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + 'description': 'md5:2c362968038d4499f4d79f88458590c1', + 'duration': 1595, + 'upload_date': '20130610', + 'timestamp': 1370893156, + 'license': 'by', + 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + 'tags': 'count:1', + }, + 'params': { + 'format': 'best[protocol=https]', + }, + }, { + # password-protected VimeoPro page with Vimeo player embed + 'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion', + 'info_dict': { + 'id': '764543723', + 'ext': 'mp4', + 'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben', + 'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280', + 'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420', + 'uploader': 'CADFEM', + 'uploader_id': 'cadfem', + 'uploader_url': 'https://vimeo.com/cadfem', + 'duration': 12505, + 'chapters': 'count:10', + }, + 'params': { + 'videopassword': 'Conference2022', + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id, video_id = self._match_valid_url(url).group('slug', 'id') + if video_id: + display_id = video_id + webpage = self._download_webpage(url, display_id) + + password_form = self._search_regex( + r'(?is)]+?method=["\']post["\'][^>]*>(.+?password.+?)', + webpage, 'password form', default=None) + if password_form: + try: + webpage = self._download_webpage(url, display_id, data=urlencode_postdata({ + 'password': self._get_video_password(), + **self._hidden_inputs(password_form), + }), note='Logging in with video password') + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 418: + raise ExtractorError('Wrong video password', expected=True) + raise + + description = None + # even if we have video_id, some videos require player URL with portfolio_id query param + # https://github.com/ytdl-org/youtube-dl/issues/20070 + vimeo_url = VimeoIE._extract_url(url, webpage) + if vimeo_url: + description = self._html_search_meta('description', webpage, default=None) + elif video_id: + vimeo_url = f'https://vimeo.com/{video_id}' + else: + raise ExtractorError( + 'No Vimeo embed or video ID could be found in VimeoPro page', expected=True) + + return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True, + description=description)