]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/vk.py
[extractor] Standardize `_live_title`
[yt-dlp.git] / yt_dlp / extractor / vk.py
index 00ec006c465e4f350bdc3f1099e6d38e5e2a8b8b..5cdb1542dd23b681c5ac77e5d63dab48fc33c0fb 100644 (file)
@@ -51,7 +51,7 @@ def _login(self):
         self._apply_first_set_cookie_header(url_handle, 'remixlhk')
 
         login_page = self._download_webpage(
-            'https://login.vk.com/?act=login', None,
+            'https://vk.com/login', None,
             note='Logging in',
             data=urlencode_postdata(login_form))
 
@@ -300,8 +300,15 @@ class VKIE(VKBaseIE):
             'only_matching': True,
         }]
 
+    @staticmethod
+    def _extract_sibnet_urls(webpage):
+        # https://help.sibnet.ru/?sibnet_video_embed
+        return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
+            r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
+            webpage)]
+
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         video_id = mobj.group('videoid')
 
         mv_data = {}
@@ -408,6 +415,10 @@ def _real_extract(self, url):
         if odnoklassniki_url:
             return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
 
+        sibnet_urls = self._extract_sibnet_urls(info_page)
+        if sibnet_urls:
+            return self.url_result(sibnet_urls[0])
+
         m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
         if m_opts:
             m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
@@ -423,8 +434,6 @@ def _real_extract(self, url):
         # 2 = live
         # 3 = post live (finished live)
         is_live = data.get('live') == 2
-        if is_live:
-            title = self._live_title(title)
 
         timestamp = unified_timestamp(self._html_search_regex(
             r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
@@ -460,6 +469,13 @@ def _real_extract(self, url):
                 })
         self._sort_formats(formats)
 
+        subtitles = {}
+        for sub in data.get('subs') or {}:
+            subtitles.setdefault(sub.get('lang', 'en'), []).append({
+                'ext': sub.get('title', '.srt').split('.')[-1],
+                'url': url_or_none(sub.get('url')),
+            })
+
         return {
             'id': video_id,
             'formats': formats,
@@ -473,6 +489,7 @@ def _real_extract(self, url):
             'like_count': int_or_none(mv_data.get('likes')),
             'comment_count': int_or_none(mv_data.get('commcount')),
             'is_live': is_live,
+            'subtitles': subtitles,
         }
 
 
@@ -527,7 +544,7 @@ def _fetch_page(self, page_id, section, page):
                 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
 
     def _real_extract(self, url):
-        page_id, section = re.match(self._VALID_URL, url).groups()
+        page_id, section = self._match_valid_url(url).groups()
         if not section:
             section = 'all'