[extractor] Standardize `_live_title`

[yt-dlp.git] / yt_dlp / extractor / vk.py
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py

index 00ec006c465e4f350bdc3f1099e6d38e5e2a8b8b..5cdb1542dd23b681c5ac77e5d63dab48fc33c0fb 100644 (file)
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -51,7 +51,7 @@ def _login(self):
          self._apply_first_set_cookie_header(url_handle, 'remixlhk')
  
          login_page = self._download_webpage(
-            'https://login.vk.com/?act=login', None,
+            'https://vk.com/login', None,
              note='Logging in',
              data=urlencode_postdata(login_form))
  
@@ -300,8 +300,15 @@ class VKIE(VKBaseIE):
              'only_matching': True,
          }]
  
+    @staticmethod
+    def _extract_sibnet_urls(webpage):
+        # https://help.sibnet.ru/?sibnet_video_embed
+        return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
+            r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
+            webpage)]
+
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
          video_id = mobj.group('videoid')
  
          mv_data = {}
@@ -408,6 +415,10 @@ def _real_extract(self, url):
          if odnoklassniki_url:
              return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
  
+        sibnet_urls = self._extract_sibnet_urls(info_page)
+        if sibnet_urls:
+            return self.url_result(sibnet_urls[0])
+
          m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
          if m_opts:
              m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
@@ -423,8 +434,6 @@ def _real_extract(self, url):
          # 2 = live
          # 3 = post live (finished live)
          is_live = data.get('live') == 2
-        if is_live:
-            title = self._live_title(title)
  
          timestamp = unified_timestamp(self._html_search_regex(
              r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
@@ -460,6 +469,13 @@ def _real_extract(self, url):
                  })
          self._sort_formats(formats)
  
+        subtitles = {}
+        for sub in data.get('subs') or {}:
+            subtitles.setdefault(sub.get('lang', 'en'), []).append({
+                'ext': sub.get('title', '.srt').split('.')[-1],
+                'url': url_or_none(sub.get('url')),
+            })
+
          return {
              'id': video_id,
              'formats': formats,
@@ -473,6 +489,7 @@ def _real_extract(self, url):
              'like_count': int_or_none(mv_data.get('likes')),
              'comment_count': int_or_none(mv_data.get('commcount')),
              'is_live': is_live,
+            'subtitles': subtitles,
          }
  
  
@@ -527,7 +544,7 @@ def _fetch_page(self, page_id, section, page):
                  'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
  
      def _real_extract(self, url):
-        page_id, section = re.match(self._VALID_URL, url).groups()
+        page_id, section = self._match_valid_url(url).groups()
          if not section:
              section = 'all'