]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/vk.py
[extractor/nosnl] Add support for /video (#5590)
[yt-dlp.git] / yt_dlp / extractor / vk.py
... / ...
CommitLineData
1import collections
2import hashlib
3import re
4
5from .common import InfoExtractor
6from .dailymotion import DailymotionIE
7from .odnoklassniki import OdnoklassnikiIE
8from .pladform import PladformIE
9from .sibnet import SibnetEmbedIE
10from .vimeo import VimeoIE
11from .youtube import YoutubeIE
12from ..compat import compat_urlparse
13from ..utils import (
14 ExtractorError,
15 clean_html,
16 get_element_by_class,
17 int_or_none,
18 orderedSet,
19 str_or_none,
20 str_to_int,
21 unescapeHTML,
22 unified_timestamp,
23 update_url_query,
24 url_or_none,
25 urlencode_postdata,
26)
27
28
29class VKBaseIE(InfoExtractor):
30 _NETRC_MACHINE = 'vk'
31
32 def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
33 response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
34 challenge_url, cookie = response[1].geturl() if response else '', None
35 if challenge_url.startswith('https://vk.com/429.html?'):
36 cookie = self._get_cookies(challenge_url).get('hash429')
37 if not cookie:
38 return response
39
40 hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
41 self._request_webpage(
42 update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
43 note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
44 return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
45
46 def _perform_login(self, username, password):
47 login_page, url_handle = self._download_webpage_handle(
48 'https://vk.com', None, 'Downloading login page')
49
50 login_form = self._hidden_inputs(login_page)
51
52 login_form.update({
53 'email': username.encode('cp1251'),
54 'pass': password.encode('cp1251'),
55 })
56
57 # vk serves two same remixlhk cookies in Set-Cookie header and expects
58 # first one to be actually set
59 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
60
61 login_page = self._download_webpage(
62 'https://vk.com/login', None,
63 note='Logging in',
64 data=urlencode_postdata(login_form))
65
66 if re.search(r'onLoginFailed', login_page):
67 raise ExtractorError(
68 'Unable to login, incorrect username and/or password', expected=True)
69
70 def _download_payload(self, path, video_id, data, fatal=True):
71 endpoint = f'https://vk.com/{path}.php'
72 data['al'] = 1
73 code, payload = self._download_json(
74 endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
75 headers={
76 'Referer': endpoint,
77 'X-Requested-With': 'XMLHttpRequest',
78 })['payload']
79 if code == '3':
80 self.raise_login_required()
81 elif code == '8':
82 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
83 return payload
84
85
86class VKIE(VKBaseIE):
87 IE_NAME = 'vk'
88 IE_DESC = 'VK'
89 _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
90 _VALID_URL = r'''(?x)
91 https?://
92 (?:
93 (?:
94 (?:(?:m|new)\.)?vk\.com/video_|
95 (?:www\.)?daxab.com/
96 )
97 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
98 (?:
99 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
100 (?:www\.)?daxab.com/embed/
101 )
102 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
103 )
104 '''
105
106 _TESTS = [
107 {
108 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
109 'info_dict': {
110 'id': '-77521_162222515',
111 'ext': 'mp4',
112 'title': 'ProtivoGunz - Хуёвая песня',
113 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
114 'uploader_id': '39545378',
115 'duration': 195,
116 'timestamp': 1329049880,
117 'upload_date': '20120212',
118 'comment_count': int,
119 'like_count': int,
120 'thumbnail': r're:https?://.+\.jpg$',
121 },
122 'params': {'skip_download': 'm3u8'},
123 },
124 {
125 'url': 'http://vk.com/video205387401_165548505',
126 'info_dict': {
127 'id': '205387401_165548505',
128 'ext': 'mp4',
129 'title': 'No name',
130 'uploader': 'Tom Cruise',
131 'uploader_id': '205387401',
132 'duration': 9,
133 'timestamp': 1374364108,
134 'upload_date': '20130720',
135 'comment_count': int,
136 'like_count': int,
137 'thumbnail': r're:https?://.+\.jpg$',
138 }
139 },
140 {
141 'note': 'Embedded video',
142 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
143 'info_dict': {
144 'id': '-77521_162222515',
145 'ext': 'mp4',
146 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
147 'title': 'ProtivoGunz - Хуёвая песня',
148 'duration': 195,
149 'upload_date': '20120212',
150 'timestamp': 1329049880,
151 'uploader_id': '39545378',
152 'thumbnail': r're:https?://.+\.jpg$',
153 },
154 'params': {'skip_download': 'm3u8'},
155 },
156 {
157 # VIDEO NOW REMOVED
158 # please update if you find a video whose URL follows the same pattern
159 'url': 'http://vk.com/video-8871596_164049491',
160 'md5': 'a590bcaf3d543576c9bd162812387666',
161 'note': 'Only available for registered users',
162 'info_dict': {
163 'id': '-8871596_164049491',
164 'ext': 'mp4',
165 'uploader': 'Триллеры',
166 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
167 'duration': 8352,
168 'upload_date': '20121218',
169 'view_count': int,
170 },
171 'skip': 'Removed',
172 },
173 {
174 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
175 'info_dict': {
176 'id': '-43215063_168067957',
177 'ext': 'mp4',
178 'uploader': 'Bro Mazter',
179 'title': ' ',
180 'duration': 7291,
181 'upload_date': '20140328',
182 'uploader_id': '223413403',
183 'timestamp': 1396018030,
184 },
185 'skip': 'Requires vk account credentials',
186 },
187 {
188 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
189 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
190 'note': 'ivi.ru embed',
191 'info_dict': {
192 'id': '-43215063_169084319',
193 'ext': 'mp4',
194 'title': 'Книга Илая',
195 'duration': 6771,
196 'upload_date': '20140626',
197 'view_count': int,
198 },
199 'skip': 'Removed',
200 },
201 {
202 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
203 'info_dict': {
204 'id': '-93049196_456239755',
205 'ext': 'mp4',
206 'title': '8 серия (озвучка)',
207 'duration': 8383,
208 'comment_count': int,
209 'uploader': 'Dizi2021',
210 'like_count': int,
211 'timestamp': 1640162189,
212 'upload_date': '20211222',
213 'uploader_id': '-93049196',
214 'thumbnail': r're:https?://.+\.jpg$',
215 },
216 },
217 {
218 # video (removed?) only available with list id
219 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
220 'md5': '091287af5402239a1051c37ec7b92913',
221 'info_dict': {
222 'id': '30481095_171201961',
223 'ext': 'mp4',
224 'title': 'ТюменцевВВ_09.07.2015',
225 'uploader': 'Anton Ivanov',
226 'duration': 109,
227 'upload_date': '20150709',
228 'view_count': int,
229 },
230 'skip': 'Removed',
231 },
232 {
233 # youtube embed
234 'url': 'https://vk.com/video276849682_170681728',
235 'info_dict': {
236 'id': 'V3K4mi0SYkc',
237 'ext': 'mp4',
238 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
239 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
240 'duration': 178,
241 'upload_date': '20130117',
242 'uploader': "Children's Joy Foundation Inc.",
243 'uploader_id': 'thecjf',
244 'view_count': int,
245 'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
246 'availability': 'public',
247 'like_count': int,
248 'live_status': 'not_live',
249 'playable_in_embed': True,
250 'channel': 'Children\'s Joy Foundation Inc.',
251 'uploader_url': 'http://www.youtube.com/user/thecjf',
252 'thumbnail': r're:https?://.+\.jpg$',
253 'tags': 'count:27',
254 'start_time': 0.0,
255 'categories': ['Nonprofits & Activism'],
256 'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
257 'age_limit': 0,
258 },
259 },
260 {
261 # dailymotion embed
262 'url': 'https://vk.com/video-37468416_456239855',
263 'info_dict': {
264 'id': 'k3lz2cmXyRuJQSjGHUv',
265 'ext': 'mp4',
266 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
267 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
268 'uploader': 'AniLibria.Tv',
269 'upload_date': '20160914',
270 'uploader_id': 'x1p5vl5',
271 'timestamp': 1473877246,
272 },
273 'skip': 'Removed'
274 },
275 {
276 # video key is extra_data not url\d+
277 'url': 'http://vk.com/video-110305615_171782105',
278 'md5': 'e13fcda136f99764872e739d13fac1d1',
279 'info_dict': {
280 'id': '-110305615_171782105',
281 'ext': 'mp4',
282 'title': 'S-Dance, репетиции к The way show',
283 'uploader': 'THE WAY SHOW | 17 апреля',
284 'uploader_id': '-110305615',
285 'timestamp': 1454859345,
286 'upload_date': '20160207',
287 },
288 'skip': 'Removed',
289 },
290 {
291 # finished live stream, postlive_mp4
292 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
293 'info_dict': {
294 'id': '-387766_456242764',
295 'ext': 'mp4',
296 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
297 'uploader': 'Игромания',
298 'duration': 5239,
299 'upload_date': '20160929',
300 'uploader_id': '-387766',
301 'timestamp': 1475137527,
302 'thumbnail': r're:https?://.+\.jpg$',
303 'comment_count': int,
304 'like_count': int,
305 },
306 'params': {
307 'skip_download': True,
308 },
309 },
310 {
311 # live stream, hls and rtmp links, most likely already finished live
312 # stream by the time you are reading this comment
313 'url': 'https://vk.com/video-140332_456239111',
314 'only_matching': True,
315 },
316 {
317 # removed video, just testing that we match the pattern
318 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
319 'only_matching': True,
320 },
321 {
322 # age restricted video, requires vk account credentials
323 'url': 'https://vk.com/video205387401_164765225',
324 'only_matching': True,
325 },
326 {
327 # pladform embed
328 'url': 'https://vk.com/video-76116461_171554880',
329 'only_matching': True,
330 },
331 {
332 'url': 'http://new.vk.com/video205387401_165548505',
333 'only_matching': True,
334 },
335 {
336 # This video is no longer available, because its author has been blocked.
337 'url': 'https://vk.com/video-10639516_456240611',
338 'only_matching': True,
339 },
340 {
341 # The video is not available in your region.
342 'url': 'https://vk.com/video-51812607_171445436',
343 'only_matching': True,
344 },
345 {
346 'url': 'https://vk.com/clip30014565_456240946',
347 'only_matching': True,
348 }]
349
350 def _real_extract(self, url):
351 mobj = self._match_valid_url(url)
352 video_id = mobj.group('videoid')
353
354 mv_data = {}
355 if video_id:
356 data = {
357 'act': 'show',
358 'video': video_id,
359 }
360 # Some videos (removed?) can only be downloaded with list id specified
361 list_id = mobj.group('list_id')
362 if list_id:
363 data['list'] = list_id
364
365 payload = self._download_payload('al_video', video_id, data)
366 info_page = payload[1]
367 opts = payload[-1]
368 mv_data = opts.get('mvData') or {}
369 player = opts.get('player') or {}
370 else:
371 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
372
373 info_page = self._download_webpage(
374 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
375
376 error_message = self._html_search_regex(
377 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
378 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
379 info_page, 'error message', default=None)
380 if error_message:
381 raise ExtractorError(error_message, expected=True)
382
383 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
384 raise ExtractorError(
385 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
386 expected=True)
387
388 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
389
390 ERRORS = {
391 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
392 ERROR_COPYRIGHT,
393
394 r'>The video .*? was removed from public access by request of the copyright holder.<':
395 ERROR_COPYRIGHT,
396
397 r'<!>Please log in or <':
398 'Video %s is only available for registered users, '
399 'use --username and --password options to provide account credentials.',
400
401 r'<!>Unknown error':
402 'Video %s does not exist.',
403
404 r'<!>Видео временно недоступно':
405 'Video %s is temporarily unavailable.',
406
407 r'<!>Access denied':
408 'Access denied to video %s.',
409
410 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
411 'Video %s is no longer available, because its author has been blocked.',
412
413 r'<!>This video is no longer available, because its author has been blocked.':
414 'Video %s is no longer available, because its author has been blocked.',
415
416 r'<!>This video is no longer available, because it has been deleted.':
417 'Video %s is no longer available, because it has been deleted.',
418
419 r'<!>The video .+? is not available in your region.':
420 'Video %s is not available in your region.',
421 }
422
423 for error_re, error_msg in ERRORS.items():
424 if re.search(error_re, info_page):
425 raise ExtractorError(error_msg % video_id, expected=True)
426
427 player = self._parse_json(self._search_regex(
428 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
429 info_page, 'player params'), video_id)
430
431 youtube_url = YoutubeIE._extract_url(info_page)
432 if youtube_url:
433 return self.url_result(youtube_url, YoutubeIE.ie_key())
434
435 vimeo_url = VimeoIE._extract_url(url, info_page)
436 if vimeo_url is not None:
437 return self.url_result(vimeo_url, VimeoIE.ie_key())
438
439 pladform_url = PladformIE._extract_url(info_page)
440 if pladform_url:
441 return self.url_result(pladform_url, PladformIE.ie_key())
442
443 m_rutube = re.search(
444 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
445 if m_rutube is not None:
446 rutube_url = self._proto_relative_url(
447 m_rutube.group(1).replace('\\', ''))
448 return self.url_result(rutube_url)
449
450 dailymotion_url = next(DailymotionIE._extract_embed_urls(url, info_page), None)
451 if dailymotion_url:
452 return self.url_result(dailymotion_url, DailymotionIE.ie_key())
453
454 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
455 if odnoklassniki_url:
456 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
457
458 sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None)
459 if sibnet_url:
460 return self.url_result(sibnet_url)
461
462 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
463 if m_opts:
464 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
465 if m_opts_url:
466 opts_url = m_opts_url.group(1)
467 if opts_url.startswith('//'):
468 opts_url = 'http:' + opts_url
469 return self.url_result(opts_url)
470
471 data = player['params'][0]
472 title = unescapeHTML(data['md_title'])
473
474 # 2 = live
475 # 3 = post live (finished live)
476 is_live = data.get('live') == 2
477
478 timestamp = unified_timestamp(self._html_search_regex(
479 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
480 'upload date', default=None)) or int_or_none(data.get('date'))
481
482 view_count = str_to_int(self._search_regex(
483 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
484 info_page, 'view count', default=None))
485
486 formats = []
487 for format_id, format_url in data.items():
488 format_url = url_or_none(format_url)
489 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
490 continue
491 if (format_id.startswith(('url', 'cache'))
492 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
493 height = int_or_none(self._search_regex(
494 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
495 formats.append({
496 'format_id': format_id,
497 'url': format_url,
498 'height': height,
499 })
500 elif format_id == 'hls':
501 formats.extend(self._extract_m3u8_formats(
502 format_url, video_id, 'mp4', 'm3u8_native',
503 m3u8_id=format_id, fatal=False, live=is_live))
504 elif format_id == 'rtmp':
505 formats.append({
506 'format_id': format_id,
507 'url': format_url,
508 'ext': 'flv',
509 })
510
511 subtitles = {}
512 for sub in data.get('subs') or {}:
513 subtitles.setdefault(sub.get('lang', 'en'), []).append({
514 'ext': sub.get('title', '.srt').split('.')[-1],
515 'url': url_or_none(sub.get('url')),
516 })
517
518 return {
519 'id': video_id,
520 'formats': formats,
521 'title': title,
522 'thumbnail': data.get('jpg'),
523 'uploader': data.get('md_author'),
524 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
525 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
526 'timestamp': timestamp,
527 'view_count': view_count,
528 'like_count': int_or_none(mv_data.get('likes')),
529 'comment_count': int_or_none(mv_data.get('commcount')),
530 'is_live': is_live,
531 'subtitles': subtitles,
532 }
533
534
535class VKUserVideosIE(VKBaseIE):
536 IE_NAME = 'vk:uservideos'
537 IE_DESC = "VK - User's Videos"
538 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
539 _TEMPLATE_URL = 'https://vk.com/videos'
540 _TESTS = [{
541 'url': 'https://vk.com/video/@mobidevices',
542 'info_dict': {
543 'id': '-17892518_all',
544 },
545 'playlist_mincount': 1355,
546 }, {
547 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
548 'info_dict': {
549 'id': '-17892518_uploaded',
550 },
551 'playlist_mincount': 182,
552 }, {
553 'url': 'https://vk.com/video/playlist/-174476437_2',
554 'info_dict': {
555 'id': '-174476437_2',
556 'title': 'Анонсы'
557 },
558 'playlist_mincount': 108,
559 }]
560 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
561
562 def _entries(self, page_id, section):
563 video_list_json = self._download_payload('al_video', page_id, {
564 'act': 'load_videos_silent',
565 'offset': 0,
566 'oid': page_id,
567 'section': section,
568 })[0][section]
569 count = video_list_json['count']
570 total = video_list_json['total']
571 video_list = video_list_json['list']
572
573 while True:
574 for video in video_list:
575 v = self._VIDEO._make(video[:2])
576 video_id = '%d_%d' % (v.owner_id, v.id)
577 yield self.url_result(
578 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
579 if count >= total:
580 break
581 video_list_json = self._download_payload('al_video', page_id, {
582 'act': 'load_videos_silent',
583 'offset': count,
584 'oid': page_id,
585 'section': section,
586 })[0][section]
587 count += video_list_json['count']
588 video_list = video_list_json['list']
589
590 def _real_extract(self, url):
591 u_id, section = self._match_valid_url(url).groups()
592 webpage = self._download_webpage(url, u_id)
593
594 if u_id.startswith('@'):
595 page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
596 elif '_' in u_id:
597 page_id, section = u_id.split('_', 1)
598 else:
599 raise ExtractorError('Invalid URL', expected=True)
600
601 if not section:
602 section = 'all'
603
604 playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
605 return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title)
606
607
608class VKWallPostIE(VKBaseIE):
609 IE_NAME = 'vk:wallpost'
610 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
611 _TESTS = [{
612 # public page URL, audio playlist
613 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
614 'info_dict': {
615 'id': '-23538238_35',
616 'title': 'Black Shadow - Wall post -23538238_35',
617 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
618 },
619 'playlist': [{
620 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
621 'info_dict': {
622 'id': '135220665_111806521',
623 'ext': 'mp4',
624 'title': 'Black Shadow - Слепое Верование',
625 'duration': 370,
626 'uploader': 'Black Shadow',
627 'artist': 'Black Shadow',
628 'track': 'Слепое Верование',
629 },
630 }, {
631 'md5': '4cc7e804579122b17ea95af7834c9233',
632 'info_dict': {
633 'id': '135220665_111802303',
634 'ext': 'mp4',
635 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
636 'duration': 423,
637 'uploader': 'Black Shadow',
638 'artist': 'Black Shadow',
639 'track': 'Война - Негасимое Бездны Пламя!',
640 },
641 }],
642 'params': {
643 'skip_download': True,
644 },
645 'skip': 'Requires vk account credentials',
646 }, {
647 # single YouTube embed, no leading -
648 'url': 'https://vk.com/wall85155021_6319',
649 'info_dict': {
650 'id': '85155021_6319',
651 'title': 'Сергей Горбунов - Wall post 85155021_6319',
652 },
653 'playlist_count': 1,
654 'skip': 'Requires vk account credentials',
655 }, {
656 # wall page URL
657 'url': 'https://vk.com/wall-23538238_35',
658 'only_matching': True,
659 }, {
660 # mobile wall page URL
661 'url': 'https://m.vk.com/wall-23538238_35',
662 'only_matching': True,
663 }]
664 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
665 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
666
667 def _decode(self, enc):
668 dec = ''
669 e = n = 0
670 for c in enc:
671 r = self._BASE64_CHARS.index(c)
672 cond = n % 4
673 e = 64 * e + r if cond else r
674 n += 1
675 if cond:
676 dec += chr(255 & e >> (-2 * n & 6))
677 return dec
678
679 def _unmask_url(self, mask_url, vk_id):
680 if 'audio_api_unavailable' in mask_url:
681 extra = mask_url.split('?extra=')[1].split('#')
682 func, base = self._decode(extra[1]).split(chr(11))
683 mask_url = list(self._decode(extra[0]))
684 url_len = len(mask_url)
685 indexes = [None] * url_len
686 index = int(base) ^ vk_id
687 for n in range(url_len - 1, -1, -1):
688 index = (url_len * (n + 1) ^ index + n) % url_len
689 indexes[n] = index
690 for n in range(1, url_len):
691 c = mask_url[n]
692 index = indexes[url_len - 1 - n]
693 mask_url[n] = mask_url[index]
694 mask_url[index] = c
695 mask_url = ''.join(mask_url)
696 return mask_url
697
698 def _real_extract(self, url):
699 post_id = self._match_id(url)
700
701 webpage = self._download_payload('wkview', post_id, {
702 'act': 'show',
703 'w': 'wall' + post_id,
704 })[1]
705
706 description = clean_html(get_element_by_class('wall_post_text', webpage))
707 uploader = clean_html(get_element_by_class('author', webpage))
708
709 entries = []
710
711 for audio in re.findall(r'data-audio="([^"]+)', webpage):
712 audio = self._parse_json(unescapeHTML(audio), post_id)
713 a = self._AUDIO._make(audio[:16])
714 if not a.url:
715 continue
716 title = unescapeHTML(a.title)
717 performer = unescapeHTML(a.performer)
718 entries.append({
719 'id': '%s_%s' % (a.owner_id, a.id),
720 'url': self._unmask_url(a.url, a.ads['vk_id']),
721 'title': '%s - %s' % (performer, title) if performer else title,
722 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
723 'duration': int_or_none(a.duration),
724 'uploader': uploader,
725 'artist': performer,
726 'track': title,
727 'ext': 'mp4',
728 'protocol': 'm3u8_native',
729 })
730
731 for video in re.finditer(
732 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
733 entries.append(self.url_result(
734 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
735
736 title = 'Wall post %s' % post_id
737
738 return self.playlist_result(
739 orderedSet(entries), post_id,
740 '%s - %s' % (uploader, title) if uploader else title,
741 description)