X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/8776349ef6b1f644584a92dfa00a05208a48edc4..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/vk.py diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 575369028..6ccc701a2 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -20,6 +20,7 @@ parse_resolution, str_or_none, str_to_int, + traverse_obj, try_call, unescapeHTML, unified_timestamp, @@ -27,7 +28,6 @@ url_or_none, urlencode_postdata, urljoin, - traverse_obj, ) @@ -36,7 +36,7 @@ class VKBaseIE(InfoExtractor): def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs): response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs) - challenge_url, cookie = response[1].geturl() if response else '', None + challenge_url, cookie = response[1].url if response else '', None if challenge_url.startswith('https://vk.com/429.html?'): cookie = self._get_cookies(challenge_url).get('hash429') if not cookie: @@ -97,12 +97,12 @@ class VKIE(VKBaseIE): (?: (?: (?:(?:m|new)\.)?vk\.com/video_| - (?:www\.)?daxab.com/ + (?:www\.)?daxab\.com/ ) ext\.php\?(?P.*?\boid=(?P-?\d+).*?\bid=(?P\d+).*)| (?: (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)| - (?:www\.)?daxab.com/embed/ + (?:www\.)?daxab\.com/embed/ ) (?P-?\d+_\d+)(?:.*\blist=(?P([\da-f]+)|(ln-[\da-zA-Z]+)))? ) @@ -140,7 +140,7 @@ class VKIE(VKBaseIE): 'comment_count': int, 'like_count': int, 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$', - } + }, }, { 'note': 'Embedded video', @@ -220,7 +220,7 @@ class VKIE(VKBaseIE): 'like_count': int, 'view_count': int, 'thumbnail': r're:https?://.+x1080$', - 'tags': list + 'tags': list, }, }, { @@ -335,7 +335,7 @@ def _real_extract(self, url): mv_data = opts.get('mvData') or {} player = opts.get('player') or {} else: - video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) + video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id')) info_page = self._download_webpage( 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id) @@ -451,6 +451,7 @@ def _real_extract(self, url): info_page, 'view count', default=None)) formats = [] + subtitles = {} for format_id, format_url in data.items(): format_url = url_or_none(format_url) if not format_url or not format_url.startswith(('http', '//', 'rtmp')): @@ -462,12 +463,21 @@ def _real_extract(self, url): formats.append({ 'format_id': format_id, 'url': format_url, + 'ext': 'mp4', + 'source_preference': 1, 'height': height, }) - elif format_id == 'hls': - formats.extend(self._extract_m3u8_formats( + elif format_id.startswith('hls') and format_id != 'hls_live_playback': + fmts, subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=format_id, fatal=False, live=is_live)) + m3u8_id=format_id, fatal=False, live=is_live) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'): + fmts, subs = self._extract_mpd_formats_and_subtitles( + format_url, video_id, mpd_id=format_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif format_id == 'rtmp': formats.append({ 'format_id': format_id, @@ -475,7 +485,6 @@ def _real_extract(self, url): 'ext': 'flv', }) - subtitles = {} for sub in data.get('subs') or {}: subtitles.setdefault(sub.get('lang', 'en'), []).append({ 'ext': sub.get('title', '.srt').split('.')[-1], @@ -496,6 +505,7 @@ def _real_extract(self, url): 'comment_count': int_or_none(mv_data.get('commcount')), 'is_live': is_live, 'subtitles': subtitles, + '_format_sort_fields': ('res', 'source'), } @@ -520,7 +530,7 @@ class VKUserVideosIE(VKBaseIE): 'url': 'https://vk.com/video/playlist/-174476437_2', 'info_dict': { 'id': '-174476437_playlist_2', - 'title': 'Анонсы' + 'title': 'Анонсы', }, 'playlist_mincount': 108, }] @@ -570,7 +580,7 @@ def _real_extract(self, url): section = 'all' playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage)) - return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title) + return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title) class VKWallPostIE(VKBaseIE): @@ -707,6 +717,7 @@ def _real_extract(self, url): class VKPlayBaseIE(InfoExtractor): + _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/' _RESOLUTIONS = { 'tiny': '256x144', 'lowest': '426x240', @@ -765,7 +776,7 @@ def _extract_common_meta(self, stream_info): class VKPlayIE(VKPlayBaseIE): - _VALID_URL = r'https?://vkplay\.live/(?P[^/]+)/record/(?P[a-f0-9\-]+)' + _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P[^/#?]+)/record/(?P[\da-f-]+)' _TESTS = [{ 'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da', 'info_dict': { @@ -776,13 +787,16 @@ class VKPlayIE(VKPlayBaseIE): 'uploader_id': '13159830', 'release_timestamp': 1683461378, 'release_date': '20230507', - 'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+', + 'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview', 'duration': 10608, 'view_count': int, 'like_count': int, 'categories': ['Atomic Heart'], }, 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records', + 'only_matching': True, }] def _real_extract(self, url): @@ -802,7 +816,7 @@ def _real_extract(self, url): class VKPlayLiveIE(VKPlayBaseIE): - _VALID_URL = r'https?://vkplay\.live/(?P[^/]+)/?(?:[#?]|$)' + _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P[^/#?]+)/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://vkplay.live/bayda', 'info_dict': { @@ -810,10 +824,10 @@ class VKPlayLiveIE(VKPlayBaseIE): 'ext': 'mp4', 'title': r're:эскапизм крута .*', 'uploader': 'Bayda', - 'uploader_id': 12279401, + 'uploader_id': '12279401', 'release_timestamp': 1687209962, 'release_date': '20230619', - 'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+', + 'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview', 'view_count': int, 'concurrent_view_count': int, 'like_count': int, @@ -822,6 +836,9 @@ class VKPlayLiveIE(VKPlayBaseIE): }, 'skip': 'livestream', 'params': {'skip_download': True}, + }, { + 'url': 'https://live.vkplay.ru/lebwa', + 'only_matching': True, }] def _real_extract(self, url):