Update to ytdl-commit-2dd6c6e

[yt-dlp.git] / yt_dlp / extractor / vimeo.py
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py

index 2e36b8861a07357f9ed9a669edb9cdd7b7582a7d..d81d9c551882495d0ff704b0c979150f28b8d8fd 100644 (file)
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -2,6 +2,7 @@
  import functools
  import re
  import itertools
+import urllib.error
  
  from .common import InfoExtractor
  from ..compat import (
@@ -123,11 +124,6 @@ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
      def _set_vimeo_cookie(self, name, value):
          self._set_cookie('vimeo.com', name, value)
  
-    def _vimeo_sort_formats(self, formats):
-        # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
-        # at the same time without actual units specified.
-        self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source'))
-
      def _parse_config(self, config, video_id):
          video_data = config['video']
          video_title = video_data.get('title')
@@ -242,6 +238,9 @@ def _parse_config(self, config, video_id):
              'formats': formats,
              'subtitles': subtitles,
              'is_live': is_live,
+            # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+            # at the same time without actual units specified.
+            '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
          }
  
      def _extract_original_format(self, url, video_id, unlisted_hash=None):
@@ -305,27 +304,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
  
      # _VALID_URL matches Vimeo URLs
      _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            (?:
-                                www|
-                                player
-                            )
-                            \.
-                        )?
-                        vimeo(?:pro)?\.com/
-                        (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
-                        (?:[^/]+/)*?
-                        (?:
-                            (?:
-                                play_redirect_hls|
-                                moogaloop\.swf)\?clip_id=
-                            )?
-                        (?:videos?/)?
-                        (?P<id>[0-9]+)
-                        (?:/(?P<unlisted_hash>[\da-f]{10}))?
-                        /?(?:[?&].*)?(?:[#].*)?$
-                    '''
+                     https?://
+                         (?:
+                             (?:
+                                 www|
+                                 player
+                             )
+                             \.
+                         )?
+                         vimeo\.com/
+                         (?:
+                             (?P<u>user)|
+                             (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+                             (?:.*?/)??
+                             (?P<q>
+                                 (?:
+                                     play_redirect_hls|
+                                     moogaloop\.swf)\?clip_id=
+                             )?
+                             (?:videos?/)?
+                         )
+                         (?P<id>[0-9]+)
+                         (?(u)
+                             /(?!videos|likes)[^/?#]+/?|
+                             (?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
+                         )
+                         (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
+                 '''
      IE_NAME = 'vimeo'
      _EMBED_REGEX = [
          # iframe
@@ -357,34 +362,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
              },
              'skip': 'No longer available'
          },
-        {
-            'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
-            'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
-            'note': 'Vimeo Pro video (#1197)',
-            'info_dict': {
-                'id': '68093876',
-                'ext': 'mp4',
-                'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
-                'uploader_id': 'openstreetmapus',
-                'uploader': 'OpenStreetMap US',
-                'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
-                'description': 'md5:2c362968038d4499f4d79f88458590c1',
-                'duration': 1595,
-                'upload_date': '20130610',
-                'timestamp': 1370893156,
-                'license': 'by',
-                'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
-                'view_count': int,
-                'comment_count': int,
-                'like_count': int,
-            },
-            'params': {
-                'format': 'best[protocol=https]',
-            },
-        },
          {
              'url': 'http://player.vimeo.com/video/54469442',
-            'md5': 'b3e7f4d2cbb53bd7dc3bb6ff4ed5cfbd',
+            'md5': '619b811a4417aa4abe78dc653becf511',
              'note': 'Videos that embed the url in the player page',
              'info_dict': {
                  'id': '54469442',
@@ -415,8 +395,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
                  'uploader_id': 'user18948128',
                  'uploader': 'Jaime Marquínez Ferrándiz',
                  'duration': 10,
-                'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
-                'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960',
+                'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
+                'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
                  'view_count': int,
                  'comment_count': int,
                  'like_count': int,
@@ -433,7 +413,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                  'id': '75629013',
                  'ext': 'mp4',
                  'title': 'Key & Peele: Terrorist Interrogation',
-                'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+                'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
                  'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
                  'uploader_id': 'atencio',
                  'uploader': 'Peter Atencio',
@@ -585,8 +565,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
                  'uploader_id': 'user18948128',
                  'uploader': 'Jaime Marquínez Ferrándiz',
                  'duration': 10,
-                'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
-                'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960',
+                'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
+                'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
                  'view_count': int,
                  'comment_count': int,
                  'like_count': int,
@@ -731,7 +711,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
              'params': {
                  'skip_download': True,
              },
-        }
+        },
+        {
+            # user playlist alias -> https://vimeo.com/258705797
+            'url': 'https://vimeo.com/user26785108/newspiritualguide',
+            'only_matching': True,
+        },
          # https://gettingthingsdone.com/workflowmap/
          # vimeo embed with check-password page protected by Referer header
      ]
@@ -776,7 +761,6 @@ def _extract_from_api(self, video_id, unlisted_hash=None):
              })
          info = self._parse_config(self._download_json(
              video['config_url'], video_id), video_id)
-        self._vimeo_sort_formats(info['formats'])
          get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
          info.update({
              'description': video.get('description'),
@@ -840,15 +824,7 @@ def _real_extract(self, url):
          if unlisted_hash:
              return self._extract_from_api(video_id, unlisted_hash)
  
-        orig_url = url
-        is_pro = 'vimeopro.com/' in url
-        if is_pro:
-            # some videos require portfolio_id to be present in player url
-            # https://github.com/ytdl-org/youtube-dl/issues/20070
-            url = self._extract_url(url, self._download_webpage(url, video_id))
-            if not url:
-                url = 'https://vimeo.com/' + video_id
-        elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
+        if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
              url = 'https://vimeo.com/' + video_id
  
          self._try_album_password(url)
@@ -869,14 +845,12 @@ def _real_extract(self, url):
              raise
  
          if '://player.vimeo.com/video/' in url:
-            config = self._parse_json(self._search_regex(
-                r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+            config = self._search_json(
+                r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id)
              if config.get('view') == 4:
                  config = self._verify_player_video_password(
                      redirect_url, video_id, headers)
-            info = self._parse_config(config, video_id)
-            self._vimeo_sort_formats(info['formats'])
-            return info
+            return self._parse_config(config, video_id)
  
          if re.search(r'<form[^>]+?id="pw_form"', webpage):
              video_password = self._get_video_password()
@@ -952,14 +926,6 @@ def is_rented():
              video_description = self._html_search_meta(
                  ['description', 'og:description', 'twitter:description'],
                  webpage, default=None)
-        if not video_description and is_pro:
-            orig_webpage = self._download_webpage(
-                orig_url, video_id,
-                note='Downloading webpage for description',
-                fatal=False)
-            if orig_webpage:
-                video_description = self._html_search_meta(
-                    'description', orig_webpage, default=None)
          if not video_description:
              self.report_warning('Cannot find video description')
  
@@ -981,7 +947,7 @@ def is_rented():
  
          info_dict_config = self._parse_config(config, video_id)
          formats.extend(info_dict_config['formats'])
-        self._vimeo_sort_formats(formats)
+        info_dict['_format_sort_fields'] = info_dict_config['_format_sort_fields']
  
          json_ld = self._search_json_ld(webpage, video_id, default={})
  
@@ -1004,7 +970,7 @@ def is_rented():
          return merge_dicts(info_dict, info_dict_config, json_ld)
  
  
-class VimeoOndemandIE(VimeoIE):
+class VimeoOndemandIE(VimeoIE):  # XXX: Do not subclass from concrete IE
      IE_NAME = 'vimeo:ondemand'
      _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
      _TESTS = [{
@@ -1129,7 +1095,7 @@ def _real_extract(self, url):
          return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
  
  
-class VimeoUserIE(VimeoChannelIE):
+class VimeoUserIE(VimeoChannelIE):  # XXX: Do not subclass from concrete IE
      IE_NAME = 'vimeo:user'
      _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos)?/?(?:$|[?#])'
      _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
@@ -1239,7 +1205,7 @@ def _real_extract(self, url):
              entries, album_id, album.get('name'), album.get('description'))
  
  
-class VimeoGroupsIE(VimeoChannelIE):
+class VimeoGroupsIE(VimeoChannelIE):  # XXX: Do not subclass from concrete IE
      IE_NAME = 'vimeo:group'
      _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
      _TESTS = [{
@@ -1326,12 +1292,11 @@ def _real_extract(self, url):
              page_url + '/action', video_id)
          if source_format:
              info_dict['formats'].append(source_format)
-        self._vimeo_sort_formats(info_dict['formats'])
          info_dict['description'] = clean_html(clip_data.get('description'))
          return info_dict
  
  
-class VimeoWatchLaterIE(VimeoChannelIE):
+class VimeoWatchLaterIE(VimeoChannelIE):  # XXX: Do not subclass from concrete IE
      IE_NAME = 'vimeo:watchlater'
      IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)'
      _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
@@ -1354,7 +1319,7 @@ def _real_extract(self, url):
          return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
  
  
-class VimeoLikesIE(VimeoChannelIE):
+class VimeoLikesIE(VimeoChannelIE):  # XXX: Do not subclass from concrete IE
      _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
      IE_NAME = 'vimeo:likes'
      IE_DESC = 'Vimeo user likes'
@@ -1398,5 +1363,90 @@ def _real_extract(self, url):
          config = self._download_json(config_url, video_id)
          info = self._parse_config(config, video_id)
          info['id'] = video_id
-        self._vimeo_sort_formats(info['formats'])
          return info
+
+
+class VimeoProIE(VimeoBaseInfoExtractor):
+    IE_NAME = 'vimeo:pro'
+    _VALID_URL = r'https?://(?:www\.)?vimeopro\.com/[^/?#]+/(?P<slug>[^/?#]+)(?:(?:/videos?/(?P<id>[0-9]+)))?'
+    _TESTS = [{
+        # Vimeo URL derived from video_id
+        'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
+        'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
+        'note': 'Vimeo Pro video (#1197)',
+        'info_dict': {
+            'id': '68093876',
+            'ext': 'mp4',
+            'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
+            'uploader_id': 'openstreetmapus',
+            'uploader': 'OpenStreetMap US',
+            'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+            'description': 'md5:2c362968038d4499f4d79f88458590c1',
+            'duration': 1595,
+            'upload_date': '20130610',
+            'timestamp': 1370893156,
+            'license': 'by',
+            'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
+            'view_count': int,
+            'comment_count': int,
+            'like_count': int,
+            'tags': 'count:1',
+        },
+        'params': {
+            'format': 'best[protocol=https]',
+        },
+    }, {
+        # password-protected VimeoPro page with Vimeo player embed
+        'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
+        'info_dict': {
+            'id': '764543723',
+            'ext': 'mp4',
+            'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
+            'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
+            'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
+            'uploader': 'CADFEM',
+            'uploader_id': 'cadfem',
+            'uploader_url': 'https://vimeo.com/cadfem',
+            'duration': 12505,
+            'chapters': 'count:10',
+        },
+        'params': {
+            'videopassword': 'Conference2022',
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id, video_id = self._match_valid_url(url).group('slug', 'id')
+        if video_id:
+            display_id = video_id
+        webpage = self._download_webpage(url, display_id)
+
+        password_form = self._search_regex(
+            r'(?is)<form[^>]+?method=["\']post["\'][^>]*>(.+?password.+?)</form>',
+            webpage, 'password form', default=None)
+        if password_form:
+            try:
+                webpage = self._download_webpage(url, display_id, data=urlencode_postdata({
+                    'password': self._get_video_password(),
+                    **self._hidden_inputs(password_form),
+                }), note='Logging in with video password')
+            except ExtractorError as e:
+                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 418:
+                    raise ExtractorError('Wrong video password', expected=True)
+                raise
+
+        description = None
+        # even if we have video_id, some videos require player URL with portfolio_id query param
+        # https://github.com/ytdl-org/youtube-dl/issues/20070
+        vimeo_url = VimeoIE._extract_url(url, webpage)
+        if vimeo_url:
+            description = self._html_search_meta('description', webpage, default=None)
+        elif video_id:
+            vimeo_url = f'https://vimeo.com/{video_id}'
+        else:
+            raise ExtractorError(
+                'No Vimeo embed or video ID could be found in VimeoPro page', expected=True)
+
+        return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True,
+                               description=description)