]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/vk.py
[ie/youtube:tab] Detect looping feeds (#6621)
[yt-dlp.git] / yt_dlp / extractor / vk.py
... / ...
CommitLineData
1import collections
2import hashlib
3import re
4
5from .common import InfoExtractor
6from .dailymotion import DailymotionIE
7from .odnoklassniki import OdnoklassnikiIE
8from .pladform import PladformIE
9from .sibnet import SibnetEmbedIE
10from .vimeo import VimeoIE
11from .youtube import YoutubeIE
12from ..utils import (
13 ExtractorError,
14 UserNotLive,
15 clean_html,
16 get_element_by_class,
17 get_element_html_by_id,
18 int_or_none,
19 join_nonempty,
20 parse_resolution,
21 str_or_none,
22 str_to_int,
23 try_call,
24 unescapeHTML,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 urlencode_postdata,
29 urljoin,
30 traverse_obj,
31)
32
33
34class VKBaseIE(InfoExtractor):
35 _NETRC_MACHINE = 'vk'
36
37 def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
38 response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
39 challenge_url, cookie = response[1].geturl() if response else '', None
40 if challenge_url.startswith('https://vk.com/429.html?'):
41 cookie = self._get_cookies(challenge_url).get('hash429')
42 if not cookie:
43 return response
44
45 hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
46 self._request_webpage(
47 update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
48 note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
49 return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
50
51 def _perform_login(self, username, password):
52 login_page, url_handle = self._download_webpage_handle(
53 'https://vk.com', None, 'Downloading login page')
54
55 login_form = self._hidden_inputs(login_page)
56
57 login_form.update({
58 'email': username.encode('cp1251'),
59 'pass': password.encode('cp1251'),
60 })
61
62 # vk serves two same remixlhk cookies in Set-Cookie header and expects
63 # first one to be actually set
64 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
65
66 login_page = self._download_webpage(
67 'https://vk.com/login', None,
68 note='Logging in',
69 data=urlencode_postdata(login_form))
70
71 if re.search(r'onLoginFailed', login_page):
72 raise ExtractorError(
73 'Unable to login, incorrect username and/or password', expected=True)
74
75 def _download_payload(self, path, video_id, data, fatal=True):
76 endpoint = f'https://vk.com/{path}.php'
77 data['al'] = 1
78 code, payload = self._download_json(
79 endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
80 headers={
81 'Referer': endpoint,
82 'X-Requested-With': 'XMLHttpRequest',
83 })['payload']
84 if code == '3':
85 self.raise_login_required()
86 elif code == '8':
87 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
88 return payload
89
90
91class VKIE(VKBaseIE):
92 IE_NAME = 'vk'
93 IE_DESC = 'VK'
94 _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
95 _VALID_URL = r'''(?x)
96 https?://
97 (?:
98 (?:
99 (?:(?:m|new)\.)?vk\.com/video_|
100 (?:www\.)?daxab.com/
101 )
102 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
103 (?:
104 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
105 (?:www\.)?daxab.com/embed/
106 )
107 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
108 )
109 '''
110
111 _TESTS = [
112 {
113 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
114 'info_dict': {
115 'id': '-77521_162222515',
116 'ext': 'mp4',
117 'title': 'ProtivoGunz - Хуёвая песня',
118 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
119 'uploader_id': '39545378',
120 'duration': 195,
121 'timestamp': 1329049880,
122 'upload_date': '20120212',
123 'comment_count': int,
124 'like_count': int,
125 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
126 },
127 'params': {'skip_download': 'm3u8'},
128 },
129 {
130 'url': 'http://vk.com/video205387401_165548505',
131 'info_dict': {
132 'id': '205387401_165548505',
133 'ext': 'mp4',
134 'title': 'No name',
135 'uploader': 'Tom Cruise',
136 'uploader_id': '205387401',
137 'duration': 9,
138 'timestamp': 1374364108,
139 'upload_date': '20130720',
140 'comment_count': int,
141 'like_count': int,
142 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
143 }
144 },
145 {
146 'note': 'Embedded video',
147 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
148 'info_dict': {
149 'id': '-77521_162222515',
150 'ext': 'mp4',
151 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
152 'title': 'ProtivoGunz - Хуёвая песня',
153 'duration': 195,
154 'upload_date': '20120212',
155 'timestamp': 1329049880,
156 'uploader_id': '39545378',
157 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
158 },
159 'params': {'skip_download': 'm3u8'},
160 },
161 {
162 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
163 'info_dict': {
164 'id': '-93049196_456239755',
165 'ext': 'mp4',
166 'title': '8 серия (озвучка)',
167 'duration': 8383,
168 'comment_count': int,
169 'uploader': 'Dizi2021',
170 'like_count': int,
171 'timestamp': 1640162189,
172 'upload_date': '20211222',
173 'uploader_id': '-93049196',
174 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
175 },
176 },
177 {
178 'note': 'youtube embed',
179 'url': 'https://vk.com/video276849682_170681728',
180 'info_dict': {
181 'id': 'V3K4mi0SYkc',
182 'ext': 'mp4',
183 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
184 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
185 'duration': 178,
186 'upload_date': '20130117',
187 'uploader': "Children's Joy Foundation Inc.",
188 'uploader_id': 'thecjf',
189 'view_count': int,
190 'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
191 'availability': 'public',
192 'like_count': int,
193 'live_status': 'not_live',
194 'playable_in_embed': True,
195 'channel': 'Children\'s Joy Foundation Inc.',
196 'uploader_url': 'http://www.youtube.com/user/thecjf',
197 'thumbnail': r're:https?://.+\.jpg$',
198 'tags': 'count:27',
199 'start_time': 0.0,
200 'categories': ['Nonprofits & Activism'],
201 'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
202 'channel_follower_count': int,
203 'age_limit': 0,
204 },
205 },
206 {
207 'note': 'dailymotion embed',
208 'url': 'https://vk.com/video-95168827_456239103?list=cca524a0f0d5557e16',
209 'info_dict': {
210 'id': 'x8gfli0',
211 'ext': 'mp4',
212 'title': 'md5:45410f60ccd4b2760da98cb5fc777d70',
213 'description': 'md5:2e71c5c9413735cfa06cf1a166f16c84',
214 'uploader': 'Movies and cinema.',
215 'upload_date': '20221218',
216 'uploader_id': 'x1jdavv',
217 'timestamp': 1671387617,
218 'age_limit': 0,
219 'duration': 2918,
220 'like_count': int,
221 'view_count': int,
222 'thumbnail': r're:https?://.+x1080$',
223 'tags': list
224 },
225 },
226 {
227 'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
228 'info_dict': {
229 'id': '-74006511_456247211',
230 'ext': 'mp4',
231 'comment_count': int,
232 'duration': 9,
233 'like_count': int,
234 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
235 'timestamp': 1664995597,
236 'title': 'Clip by @madempress',
237 'upload_date': '20221005',
238 'uploader': 'Шальная императрица',
239 'uploader_id': '-74006511',
240 },
241 },
242 {
243 # video key is extra_data not url\d+
244 'url': 'http://vk.com/video-110305615_171782105',
245 'md5': 'e13fcda136f99764872e739d13fac1d1',
246 'info_dict': {
247 'id': '-110305615_171782105',
248 'ext': 'mp4',
249 'title': 'S-Dance, репетиции к The way show',
250 'uploader': 'THE WAY SHOW | 17 апреля',
251 'uploader_id': '-110305615',
252 'timestamp': 1454859345,
253 'upload_date': '20160207',
254 },
255 'skip': 'Removed',
256 },
257 {
258 'note': 'finished live stream, postlive_mp4',
259 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
260 'info_dict': {
261 'id': '-387766_456242764',
262 'ext': 'mp4',
263 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
264 'uploader': 'Игромания',
265 'duration': 5239,
266 'upload_date': '20160929',
267 'uploader_id': '-387766',
268 'timestamp': 1475137527,
269 'thumbnail': r're:https?://.+\.jpg$',
270 'comment_count': int,
271 'like_count': int,
272 },
273 'params': {
274 'skip_download': True,
275 },
276 },
277 {
278 # live stream, hls and rtmp links, most likely already finished live
279 # stream by the time you are reading this comment
280 'url': 'https://vk.com/video-140332_456239111',
281 'only_matching': True,
282 },
283 {
284 # removed video, just testing that we match the pattern
285 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
286 'only_matching': True,
287 },
288 {
289 # age restricted video, requires vk account credentials
290 'url': 'https://vk.com/video205387401_164765225',
291 'only_matching': True,
292 },
293 {
294 # pladform embed
295 'url': 'https://vk.com/video-76116461_171554880',
296 'only_matching': True,
297 },
298 {
299 'url': 'http://new.vk.com/video205387401_165548505',
300 'only_matching': True,
301 },
302 {
303 # This video is no longer available, because its author has been blocked.
304 'url': 'https://vk.com/video-10639516_456240611',
305 'only_matching': True,
306 },
307 {
308 # The video is not available in your region.
309 'url': 'https://vk.com/video-51812607_171445436',
310 'only_matching': True,
311 },
312 {
313 'url': 'https://vk.com/clip30014565_456240946',
314 'only_matching': True,
315 }]
316
317 def _real_extract(self, url):
318 mobj = self._match_valid_url(url)
319 video_id = mobj.group('videoid')
320
321 mv_data = {}
322 if video_id:
323 data = {
324 'act': 'show',
325 'video': video_id,
326 }
327 # Some videos (removed?) can only be downloaded with list id specified
328 list_id = mobj.group('list_id')
329 if list_id:
330 data['list'] = list_id
331
332 payload = self._download_payload('al_video', video_id, data)
333 info_page = payload[1]
334 opts = payload[-1]
335 mv_data = opts.get('mvData') or {}
336 player = opts.get('player') or {}
337 else:
338 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
339
340 info_page = self._download_webpage(
341 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
342
343 error_message = self._html_search_regex(
344 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
345 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
346 info_page, 'error message', default=None)
347 if error_message:
348 raise ExtractorError(error_message, expected=True)
349
350 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
351 raise ExtractorError(
352 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
353 expected=True)
354
355 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
356
357 ERRORS = {
358 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
359 ERROR_COPYRIGHT,
360
361 r'>The video .*? was removed from public access by request of the copyright holder.<':
362 ERROR_COPYRIGHT,
363
364 r'<!>Please log in or <':
365 'Video %s is only available for registered users, '
366 'use --username and --password options to provide account credentials.',
367
368 r'<!>Unknown error':
369 'Video %s does not exist.',
370
371 r'<!>Видео временно недоступно':
372 'Video %s is temporarily unavailable.',
373
374 r'<!>Access denied':
375 'Access denied to video %s.',
376
377 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
378 'Video %s is no longer available, because its author has been blocked.',
379
380 r'<!>This video is no longer available, because its author has been blocked.':
381 'Video %s is no longer available, because its author has been blocked.',
382
383 r'<!>This video is no longer available, because it has been deleted.':
384 'Video %s is no longer available, because it has been deleted.',
385
386 r'<!>The video .+? is not available in your region.':
387 'Video %s is not available in your region.',
388 }
389
390 for error_re, error_msg in ERRORS.items():
391 if re.search(error_re, info_page):
392 raise ExtractorError(error_msg % video_id, expected=True)
393
394 player = self._parse_json(self._search_regex(
395 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
396 info_page, 'player params'), video_id)
397
398 youtube_url = YoutubeIE._extract_url(info_page)
399 if youtube_url:
400 return self.url_result(youtube_url, YoutubeIE.ie_key())
401
402 vimeo_url = VimeoIE._extract_url(url, info_page)
403 if vimeo_url is not None:
404 return self.url_result(vimeo_url, VimeoIE.ie_key())
405
406 pladform_url = PladformIE._extract_url(info_page)
407 if pladform_url:
408 return self.url_result(pladform_url, PladformIE.ie_key())
409
410 m_rutube = re.search(
411 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
412 if m_rutube is not None:
413 rutube_url = self._proto_relative_url(
414 m_rutube.group(1).replace('\\', ''))
415 return self.url_result(rutube_url)
416
417 dailymotion_url = next(DailymotionIE._extract_embed_urls(url, info_page), None)
418 if dailymotion_url:
419 return self.url_result(dailymotion_url, DailymotionIE.ie_key())
420
421 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
422 if odnoklassniki_url:
423 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
424
425 sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None)
426 if sibnet_url:
427 return self.url_result(sibnet_url)
428
429 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
430 if m_opts:
431 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
432 if m_opts_url:
433 opts_url = m_opts_url.group(1)
434 if opts_url.startswith('//'):
435 opts_url = 'http:' + opts_url
436 return self.url_result(opts_url)
437
438 data = player['params'][0]
439 title = unescapeHTML(data['md_title'])
440
441 # 2 = live
442 # 3 = post live (finished live)
443 is_live = data.get('live') == 2
444
445 timestamp = unified_timestamp(self._html_search_regex(
446 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
447 'upload date', default=None)) or int_or_none(data.get('date'))
448
449 view_count = str_to_int(self._search_regex(
450 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
451 info_page, 'view count', default=None))
452
453 formats = []
454 for format_id, format_url in data.items():
455 format_url = url_or_none(format_url)
456 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
457 continue
458 if (format_id.startswith(('url', 'cache'))
459 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
460 height = int_or_none(self._search_regex(
461 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
462 formats.append({
463 'format_id': format_id,
464 'url': format_url,
465 'height': height,
466 })
467 elif format_id == 'hls':
468 formats.extend(self._extract_m3u8_formats(
469 format_url, video_id, 'mp4', 'm3u8_native',
470 m3u8_id=format_id, fatal=False, live=is_live))
471 elif format_id == 'rtmp':
472 formats.append({
473 'format_id': format_id,
474 'url': format_url,
475 'ext': 'flv',
476 })
477
478 subtitles = {}
479 for sub in data.get('subs') or {}:
480 subtitles.setdefault(sub.get('lang', 'en'), []).append({
481 'ext': sub.get('title', '.srt').split('.')[-1],
482 'url': url_or_none(sub.get('url')),
483 })
484
485 return {
486 'id': video_id,
487 'formats': formats,
488 'title': title,
489 'thumbnail': data.get('jpg'),
490 'uploader': data.get('md_author'),
491 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
492 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
493 'timestamp': timestamp,
494 'view_count': view_count,
495 'like_count': int_or_none(mv_data.get('likes')),
496 'comment_count': int_or_none(mv_data.get('commcount')),
497 'is_live': is_live,
498 'subtitles': subtitles,
499 }
500
501
502class VKUserVideosIE(VKBaseIE):
503 IE_NAME = 'vk:uservideos'
504 IE_DESC = "VK - User's Videos"
505 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
506 _TEMPLATE_URL = 'https://vk.com/videos'
507 _TESTS = [{
508 'url': 'https://vk.com/video/@mobidevices',
509 'info_dict': {
510 'id': '-17892518_all',
511 },
512 'playlist_mincount': 1355,
513 }, {
514 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
515 'info_dict': {
516 'id': '-17892518_uploaded',
517 },
518 'playlist_mincount': 182,
519 }, {
520 'url': 'https://vk.com/video/playlist/-174476437_2',
521 'info_dict': {
522 'id': '-174476437_playlist_2',
523 'title': 'Анонсы'
524 },
525 'playlist_mincount': 108,
526 }]
527 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
528
529 def _entries(self, page_id, section):
530 video_list_json = self._download_payload('al_video', page_id, {
531 'act': 'load_videos_silent',
532 'offset': 0,
533 'oid': page_id,
534 'section': section,
535 })[0][section]
536 count = video_list_json['count']
537 total = video_list_json['total']
538 video_list = video_list_json['list']
539
540 while True:
541 for video in video_list:
542 v = self._VIDEO._make(video[:2])
543 video_id = '%d_%d' % (v.owner_id, v.id)
544 yield self.url_result(
545 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
546 if count >= total:
547 break
548 video_list_json = self._download_payload('al_video', page_id, {
549 'act': 'load_videos_silent',
550 'offset': count,
551 'oid': page_id,
552 'section': section,
553 })[0][section]
554 count += video_list_json['count']
555 video_list = video_list_json['list']
556
557 def _real_extract(self, url):
558 u_id, section = self._match_valid_url(url).groups()
559 webpage = self._download_webpage(url, u_id)
560
561 if u_id.startswith('@'):
562 page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
563 elif '_' in u_id:
564 page_id, section = u_id.split('_', 1)
565 section = f'playlist_{section}'
566 else:
567 raise ExtractorError('Invalid URL', expected=True)
568
569 if not section:
570 section = 'all'
571
572 playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
573 return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title)
574
575
576class VKWallPostIE(VKBaseIE):
577 IE_NAME = 'vk:wallpost'
578 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
579 _TESTS = [{
580 # public page URL, audio playlist
581 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
582 'info_dict': {
583 'id': '-23538238_35',
584 'title': 'Black Shadow - Wall post -23538238_35',
585 'description': 'md5:190c78f905a53e0de793d83933c6e67f',
586 },
587 'playlist': [{
588 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
589 'info_dict': {
590 'id': '135220665_111806521',
591 'ext': 'm4a',
592 'title': 'Black Shadow - Слепое Верование',
593 'duration': 370,
594 'uploader': 'Black Shadow',
595 'artist': 'Black Shadow',
596 'track': 'Слепое Верование',
597 },
598 }, {
599 'md5': '4cc7e804579122b17ea95af7834c9233',
600 'info_dict': {
601 'id': '135220665_111802303',
602 'ext': 'm4a',
603 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
604 'duration': 423,
605 'uploader': 'Black Shadow',
606 'artist': 'Black Shadow',
607 'track': 'Война - Негасимое Бездны Пламя!',
608 },
609 }],
610 'params': {
611 'skip_download': True,
612 },
613 }, {
614 # single YouTube embed with irrelevant reaction videos
615 'url': 'https://vk.com/wall-32370614_7173954',
616 'info_dict': {
617 'id': '-32370614_7173954',
618 'title': 'md5:9f93c405bbc00061d34007d78c75e3bc',
619 'description': 'md5:953b811f26fa9f21ee5856e2ea8e68fc',
620 },
621 'playlist_count': 1,
622 }, {
623 # wall page URL
624 'url': 'https://vk.com/wall-23538238_35',
625 'only_matching': True,
626 }, {
627 # mobile wall page URL
628 'url': 'https://m.vk.com/wall-23538238_35',
629 'only_matching': True,
630 }]
631 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
632 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
633
634 def _decode(self, enc):
635 dec = ''
636 e = n = 0
637 for c in enc:
638 r = self._BASE64_CHARS.index(c)
639 cond = n % 4
640 e = 64 * e + r if cond else r
641 n += 1
642 if cond:
643 dec += chr(255 & e >> (-2 * n & 6))
644 return dec
645
646 def _unmask_url(self, mask_url, vk_id):
647 if 'audio_api_unavailable' in mask_url:
648 extra = mask_url.split('?extra=')[1].split('#')
649 func, base = self._decode(extra[1]).split(chr(11))
650 mask_url = list(self._decode(extra[0]))
651 url_len = len(mask_url)
652 indexes = [None] * url_len
653 index = int(base) ^ vk_id
654 for n in range(url_len - 1, -1, -1):
655 index = (url_len * (n + 1) ^ index + n) % url_len
656 indexes[n] = index
657 for n in range(1, url_len):
658 c = mask_url[n]
659 index = indexes[url_len - 1 - n]
660 mask_url[n] = mask_url[index]
661 mask_url[index] = c
662 mask_url = ''.join(mask_url)
663 return mask_url
664
665 def _real_extract(self, url):
666 post_id = self._match_id(url)
667
668 webpage = self._download_payload('wkview', post_id, {
669 'act': 'show',
670 'w': 'wall' + post_id,
671 })[1]
672
673 uploader = clean_html(get_element_by_class('PostHeaderTitle__authorName', webpage))
674
675 entries = []
676
677 for audio in re.findall(r'data-audio="([^"]+)', webpage):
678 audio = self._parse_json(unescapeHTML(audio), post_id)
679 if not audio['url']:
680 continue
681 title = unescapeHTML(audio.get('title'))
682 artist = unescapeHTML(audio.get('artist'))
683 entries.append({
684 'id': f'{audio["owner_id"]}_{audio["id"]}',
685 'title': join_nonempty(artist, title, delim=' - '),
686 'thumbnails': try_call(lambda: [{'url': u} for u in audio['coverUrl'].split(',')]),
687 'duration': int_or_none(audio.get('duration')),
688 'uploader': uploader,
689 'artist': artist,
690 'track': title,
691 'formats': [{
692 'url': audio['url'],
693 'ext': 'm4a',
694 'vcodec': 'none',
695 'acodec': 'mp3',
696 'container': 'm4a_dash',
697 }],
698 })
699
700 entries.extend(self.url_result(urljoin(url, entry), VKIE) for entry in set(re.findall(
701 r'<a[^>]+href=(?:["\'])(/video(?:-?[\d_]+)[^"\']*)',
702 get_element_html_by_id('wl_post_body', webpage))))
703
704 return self.playlist_result(
705 entries, post_id, join_nonempty(uploader, f'Wall post {post_id}', delim=' - '),
706 clean_html(get_element_by_class('wall_post_text', webpage)))
707
708
709class VKPlayBaseIE(InfoExtractor):
710 _RESOLUTIONS = {
711 'tiny': '256x144',
712 'lowest': '426x240',
713 'low': '640x360',
714 'medium': '852x480',
715 'high': '1280x720',
716 'full_hd': '1920x1080',
717 'quad_hd': '2560x1440',
718 }
719
720 def _extract_from_initial_state(self, url, video_id, path):
721 webpage = self._download_webpage(url, video_id)
722 video_info = traverse_obj(self._search_json(
723 r'<script[^>]+\bid="initial-state"[^>]*>', webpage, 'initial state', video_id),
724 path, expected_type=dict)
725 if not video_info:
726 raise ExtractorError('Unable to extract video info from html inline initial state')
727 return video_info
728
729 def _extract_formats(self, stream_info, video_id):
730 formats = []
731 for stream in traverse_obj(stream_info, (
732 'data', 0, 'playerUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
733 url = stream['url']
734 format_id = str_or_none(stream['type'])
735 if format_id in ('hls', 'live_hls', 'live_playback_hls') or '.m3u8' in url:
736 formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id=format_id, fatal=False))
737 elif format_id == 'dash':
738 formats.extend(self._extract_mpd_formats(url, video_id, mpd_id=format_id, fatal=False))
739 elif format_id in ('live_dash', 'live_playback_dash'):
740 self.write_debug(f'Not extracting unsupported format "{format_id}"')
741 else:
742 formats.append({
743 'url': url,
744 'ext': 'mp4',
745 'format_id': format_id,
746 **parse_resolution(self._RESOLUTIONS.get(format_id)),
747 })
748 return formats
749
750 def _extract_common_meta(self, stream_info):
751 return traverse_obj(stream_info, {
752 'id': ('id', {str_or_none}),
753 'title': ('title', {str}),
754 'release_timestamp': ('startTime', {int_or_none}),
755 'thumbnail': ('previewUrl', {url_or_none}),
756 'view_count': ('count', 'views', {int_or_none}),
757 'like_count': ('count', 'likes', {int_or_none}),
758 'categories': ('category', 'title', {str}, {lambda x: [x] if x else None}),
759 'uploader': (('user', ('blog', 'owner')), 'nick', {str}),
760 'uploader_id': (('user', ('blog', 'owner')), 'id', {str_or_none}),
761 'duration': ('duration', {int_or_none}),
762 'is_live': ('isOnline', {bool}),
763 'concurrent_view_count': ('count', 'viewers', {int_or_none}),
764 }, get_all=False)
765
766
767class VKPlayIE(VKPlayBaseIE):
768 _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/]+)/record/(?P<id>[a-f0-9\-]+)'
769 _TESTS = [{
770 'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
771 'info_dict': {
772 'id': 'f5e6e3b5-dc52-4d14-965d-0680dd2882da',
773 'ext': 'mp4',
774 'title': 'Atomic Heart (пробуем!) спасибо подписчику EKZO!',
775 'uploader': 'ZitsmanN',
776 'uploader_id': '13159830',
777 'release_timestamp': 1683461378,
778 'release_date': '20230507',
779 'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
780 'duration': 10608,
781 'view_count': int,
782 'like_count': int,
783 'categories': ['Atomic Heart'],
784 },
785 'params': {'skip_download': 'm3u8'},
786 }]
787
788 def _real_extract(self, url):
789 username, video_id = self._match_valid_url(url).groups()
790
791 record_info = traverse_obj(self._download_json(
792 f'https://api.vkplay.live/v1/blog/{username}/public_video_stream/record/{video_id}', video_id, fatal=False),
793 ('data', 'record', {dict}))
794 if not record_info:
795 record_info = self._extract_from_initial_state(url, video_id, ('record', 'currentRecord', 'data'))
796
797 return {
798 **self._extract_common_meta(record_info),
799 'id': video_id,
800 'formats': self._extract_formats(record_info, video_id),
801 }
802
803
804class VKPlayLiveIE(VKPlayBaseIE):
805 _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/]+)/?(?:[#?]|$)'
806 _TESTS = [{
807 'url': 'https://vkplay.live/bayda',
808 'info_dict': {
809 'id': 'f02c321e-427b-408d-b12f-ae34e53e0ea2',
810 'ext': 'mp4',
811 'title': r're:эскапизм крута .*',
812 'uploader': 'Bayda',
813 'uploader_id': 12279401,
814 'release_timestamp': 1687209962,
815 'release_date': '20230619',
816 'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
817 'view_count': int,
818 'concurrent_view_count': int,
819 'like_count': int,
820 'categories': ['EVE Online'],
821 'live_status': 'is_live',
822 },
823 'skip': 'livestream',
824 'params': {'skip_download': True},
825 }]
826
827 def _real_extract(self, url):
828 username = self._match_id(url)
829
830 stream_info = self._download_json(
831 f'https://api.vkplay.live/v1/blog/{username}/public_video_stream', username, fatal=False)
832 if not stream_info:
833 stream_info = self._extract_from_initial_state(url, username, ('stream', 'stream', 'data', 'stream'))
834
835 formats = self._extract_formats(stream_info, username)
836 if not formats and not traverse_obj(stream_info, ('isOnline', {bool})):
837 raise UserNotLive(video_id=username)
838
839 return {
840 **self._extract_common_meta(stream_info),
841 'formats': formats,
842 }