]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/vk.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / vk.py
index c12e873623fc62d444a881325f516473d2d28609..6ccc701a2b2b714326a1c329848f6192cceb91e3 100644 (file)
@@ -20,6 +20,7 @@
     parse_resolution,
     str_or_none,
     str_to_int,
+    traverse_obj,
     try_call,
     unescapeHTML,
     unified_timestamp,
@@ -27,7 +28,6 @@
     url_or_none,
     urlencode_postdata,
     urljoin,
-    traverse_obj,
 )
 
 
@@ -140,7 +140,7 @@ class VKIE(VKBaseIE):
                 'comment_count': int,
                 'like_count': int,
                 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
-            }
+            },
         },
         {
             'note': 'Embedded video',
@@ -220,7 +220,7 @@ class VKIE(VKBaseIE):
                 'like_count': int,
                 'view_count': int,
                 'thumbnail': r're:https?://.+x1080$',
-                'tags': list
+                'tags': list,
             },
         },
         {
@@ -335,7 +335,7 @@ def _real_extract(self, url):
             mv_data = opts.get('mvData') or {}
             player = opts.get('player') or {}
         else:
-            video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
+            video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id'))
 
             info_page = self._download_webpage(
                 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
@@ -451,6 +451,7 @@ def _real_extract(self, url):
             info_page, 'view count', default=None))
 
         formats = []
+        subtitles = {}
         for format_id, format_url in data.items():
             format_url = url_or_none(format_url)
             if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
@@ -462,12 +463,21 @@ def _real_extract(self, url):
                 formats.append({
                     'format_id': format_id,
                     'url': format_url,
+                    'ext': 'mp4',
+                    'source_preference': 1,
                     'height': height,
                 })
-            elif format_id == 'hls':
-                formats.extend(self._extract_m3u8_formats(
+            elif format_id.startswith('hls') and format_id != 'hls_live_playback':
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                     format_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=format_id, fatal=False, live=is_live))
+                    m3u8_id=format_id, fatal=False, live=is_live)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'):
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
+                    format_url, video_id, mpd_id=format_id, fatal=False)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
             elif format_id == 'rtmp':
                 formats.append({
                     'format_id': format_id,
@@ -475,7 +485,6 @@ def _real_extract(self, url):
                     'ext': 'flv',
                 })
 
-        subtitles = {}
         for sub in data.get('subs') or {}:
             subtitles.setdefault(sub.get('lang', 'en'), []).append({
                 'ext': sub.get('title', '.srt').split('.')[-1],
@@ -496,6 +505,7 @@ def _real_extract(self, url):
             'comment_count': int_or_none(mv_data.get('commcount')),
             'is_live': is_live,
             'subtitles': subtitles,
+            '_format_sort_fields': ('res', 'source'),
         }
 
 
@@ -520,7 +530,7 @@ class VKUserVideosIE(VKBaseIE):
         'url': 'https://vk.com/video/playlist/-174476437_2',
         'info_dict': {
             'id': '-174476437_playlist_2',
-            'title': 'Анонсы'
+            'title': 'Анонсы',
         },
         'playlist_mincount': 108,
     }]
@@ -570,7 +580,7 @@ def _real_extract(self, url):
             section = 'all'
 
         playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
-        return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title)
+        return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title)
 
 
 class VKWallPostIE(VKBaseIE):
@@ -707,6 +717,7 @@ def _real_extract(self, url):
 
 
 class VKPlayBaseIE(InfoExtractor):
+    _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/'
     _RESOLUTIONS = {
         'tiny': '256x144',
         'lowest': '426x240',
@@ -765,7 +776,7 @@ def _extract_common_meta(self, stream_info):
 
 
 class VKPlayIE(VKPlayBaseIE):
-    _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/#?]+)/record/(?P<id>[a-f0-9-]+)'
+    _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<username>[^/#?]+)/record/(?P<id>[\da-f-]+)'
     _TESTS = [{
         'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
         'info_dict': {
@@ -776,13 +787,16 @@ class VKPlayIE(VKPlayBaseIE):
             'uploader_id': '13159830',
             'release_timestamp': 1683461378,
             'release_date': '20230507',
-            'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
+            'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview',
             'duration': 10608,
             'view_count': int,
             'like_count': int,
             'categories': ['Atomic Heart'],
         },
         'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -802,7 +816,7 @@ def _real_extract(self, url):
 
 
 class VKPlayLiveIE(VKPlayBaseIE):
-    _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/#?]+)/?(?:[#?]|$)'
+    _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<id>[^/#?]+)/?(?:[#?]|$)'
     _TESTS = [{
         'url': 'https://vkplay.live/bayda',
         'info_dict': {
@@ -810,10 +824,10 @@ class VKPlayLiveIE(VKPlayBaseIE):
             'ext': 'mp4',
             'title': r're:эскапизм крута .*',
             'uploader': 'Bayda',
-            'uploader_id': 12279401,
+            'uploader_id': '12279401',
             'release_timestamp': 1687209962,
             'release_date': '20230619',
-            'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
+            'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview',
             'view_count': int,
             'concurrent_view_count': int,
             'like_count': int,
@@ -822,6 +836,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
         },
         'skip': 'livestream',
         'params': {'skip_download': True},
+    }, {
+        'url': 'https://live.vkplay.ru/lebwa',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):