[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)

[yt-dlp.git] / yt_dlp / extractor / vk.py
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py

index 915422817a55ab1771a13a935700f803ce9442a6..132d65bcaee52bd5a43405d7aaa52535723e0929 100644 (file)
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -20,6 +20,7 @@
      parse_resolution,
      str_or_none,
      str_to_int,
+    traverse_obj,
      try_call,
      unescapeHTML,
      unified_timestamp,
@@ -27,7 +28,6 @@
      url_or_none,
      urlencode_postdata,
      urljoin,
-    traverse_obj,
  )
  
  
@@ -97,12 +97,12 @@ class VKIE(VKBaseIE):
                          (?:
                              (?:
                                  (?:(?:m|new)\.)?vk\.com/video_|
-                                (?:www\.)?daxab.com/
+                                (?:www\.)?daxab\.com/
                              )
                              ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
                              (?:
                                  (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
-                                (?:www\.)?daxab.com/embed/
+                                (?:www\.)?daxab\.com/embed/
                              )
                              (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
                          )
@@ -451,6 +451,7 @@ def _real_extract(self, url):
              info_page, 'view count', default=None))
  
          formats = []
+        subtitles = {}
          for format_id, format_url in data.items():
              format_url = url_or_none(format_url)
              if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
@@ -462,12 +463,21 @@ def _real_extract(self, url):
                  formats.append({
                      'format_id': format_id,
                      'url': format_url,
+                    'ext': 'mp4',
+                    'source_preference': 1,
                      'height': height,
                  })
              elif format_id == 'hls':
-                formats.extend(self._extract_m3u8_formats(
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                      format_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=format_id, fatal=False, live=is_live))
+                    m3u8_id=format_id, fatal=False, live=is_live)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            elif format_id.startswith('dash_'):
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
+                    format_url, video_id, mpd_id=format_id, fatal=False)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
              elif format_id == 'rtmp':
                  formats.append({
                      'format_id': format_id,
@@ -475,7 +485,6 @@ def _real_extract(self, url):
                      'ext': 'flv',
                  })
  
-        subtitles = {}
          for sub in data.get('subs') or {}:
              subtitles.setdefault(sub.get('lang', 'en'), []).append({
                  'ext': sub.get('title', '.srt').split('.')[-1],
@@ -496,6 +505,7 @@ def _real_extract(self, url):
              'comment_count': int_or_none(mv_data.get('commcount')),
              'is_live': is_live,
              'subtitles': subtitles,
+            '_format_sort_fields': ('res', 'source'),
          }
  
  
@@ -707,6 +717,7 @@ def _real_extract(self, url):
  
  
  class VKPlayBaseIE(InfoExtractor):
+    _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/'
      _RESOLUTIONS = {
          'tiny': '256x144',
          'lowest': '426x240',
@@ -765,7 +776,7 @@ def _extract_common_meta(self, stream_info):
  
  
  class VKPlayIE(VKPlayBaseIE):
-    _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/#?]+)/record/(?P<id>[a-f0-9-]+)'
+    _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<username>[^/#?]+)/record/(?P<id>[\da-f-]+)'
      _TESTS = [{
          'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
          'info_dict': {
@@ -776,13 +787,16 @@ class VKPlayIE(VKPlayBaseIE):
              'uploader_id': '13159830',
              'release_timestamp': 1683461378,
              'release_date': '20230507',
-            'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
+            'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview',
              'duration': 10608,
              'view_count': int,
              'like_count': int,
              'categories': ['Atomic Heart'],
          },
          'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -802,7 +816,7 @@ def _real_extract(self, url):
  
  
  class VKPlayLiveIE(VKPlayBaseIE):
-    _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/#?]+)/?(?:[#?]|$)'
+    _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<id>[^/#?]+)/?(?:[#?]|$)'
      _TESTS = [{
          'url': 'https://vkplay.live/bayda',
          'info_dict': {
@@ -810,10 +824,10 @@ class VKPlayLiveIE(VKPlayBaseIE):
              'ext': 'mp4',
              'title': r're:эскапизм крута .*',
              'uploader': 'Bayda',
-            'uploader_id': 12279401,
+            'uploader_id': '12279401',
              'release_timestamp': 1687209962,
              'release_date': '20230619',
-            'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
+            'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview',
              'view_count': int,
              'concurrent_view_count': int,
              'like_count': int,
@@ -822,6 +836,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
          },
          'skip': 'livestream',
          'params': {'skip_download': True},
+    }, {
+        'url': 'https://live.vkplay.ru/lebwa',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):