]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/vk.py
[skip travis] renaming
[yt-dlp.git] / youtube_dlc / extractor / vk.py
CommitLineData
dcdb292f 1# coding: utf-8
94a23d2a
PH
2from __future__ import unicode_literals
3
51815886 4import collections
0e6ec3ca 5import functools
60d142aa 6import re
60d142aa
JMF
7
8from .common import InfoExtractor
059cd768 9from ..compat import compat_urlparse
60d142aa 10from ..utils import (
2d19fb50 11 clean_html,
9032dc28 12 ExtractorError,
2d19fb50 13 get_element_by_class,
bf4b3b6b 14 int_or_none,
0e6ec3ca 15 OnDemandPagedList,
1cc79574 16 orderedSet,
ad1bc71a 17 str_or_none,
8117df4c 18 str_to_int,
60d142aa 19 unescapeHTML,
a7ee8a00 20 unified_timestamp,
3052a30d 21 url_or_none,
6e6bc8da 22 urlencode_postdata,
1cc79574 23)
e3845525 24from .dailymotion import DailymotionIE
3c989818 25from .odnoklassniki import OdnoklassnikiIE
c4737bea 26from .pladform import PladformIE
e3845525 27from .vimeo import VimeoIE
5113b691 28from .youtube import YoutubeIE
60d142aa
JMF
29
30
2d19fb50
S
31class VKBaseIE(InfoExtractor):
32 _NETRC_MACHINE = 'vk'
33
34 def _login(self):
68217024 35 username, password = self._get_login_info()
2d19fb50
S
36 if username is None:
37 return
38
39 login_page, url_handle = self._download_webpage_handle(
40 'https://vk.com', None, 'Downloading login page')
41
42 login_form = self._hidden_inputs(login_page)
43
44 login_form.update({
45 'email': username.encode('cp1251'),
46 'pass': password.encode('cp1251'),
47 })
48
e3c1266f
S
49 # vk serves two same remixlhk cookies in Set-Cookie header and expects
50 # first one to be actually set
51 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
52
53 login_page = self._download_webpage(
54 'https://login.vk.com/?act=login', None,
e4d95865 55 note='Logging in',
2d19fb50
S
56 data=urlencode_postdata(login_form))
57
58 if re.search(r'onLoginFailed', login_page):
59 raise ExtractorError(
60 'Unable to login, incorrect username and/or password', expected=True)
61
62 def _real_initialize(self):
63 self._login()
64
3c989818
RA
65 def _download_payload(self, path, video_id, data, fatal=True):
66 data['al'] = 1
67 code, payload = self._download_json(
68 'https://vk.com/%s.php' % path, video_id,
69 data=urlencode_postdata(data), fatal=fatal,
70 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
71 if code == '3':
72 self.raise_login_required()
73 elif code == '8':
74 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
75 return payload
76
2d19fb50
S
77
78class VKIE(VKBaseIE):
1ecb5d1d
S
79 IE_NAME = 'vk'
80 IE_DESC = 'VK'
cf9cf7dd
S
81 _VALID_URL = r'''(?x)
82 https?://
83 (?:
04e88ca2 84 (?:
bdafd88d 85 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 86 (?:www\.)?daxab.com/
87 )
88 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 89 (?:
bdafd88d 90 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
04e88ca2 91 (?:www\.)?daxab.com/embed/
cf9cf7dd 92 )
04e88ca2 93 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
cf9cf7dd
S
94 )
95 '''
9032dc28
S
96 _TESTS = [
97 {
98 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 99 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 100 'info_dict': {
220828f2 101 'id': '-77521_162222515',
09f934b0 102 'ext': 'mp4',
9032dc28 103 'title': 'ProtivoGunz - Хуёвая песня',
36300346 104 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 105 'uploader_id': '-77521',
9032dc28 106 'duration': 195,
ad1bc71a 107 'timestamp': 1329049880,
42e1ff86 108 'upload_date': '20120212',
9032dc28 109 },
60d142aa 110 },
9032dc28 111 {
c52331f3 112 'url': 'http://vk.com/video205387401_165548505',
9032dc28 113 'info_dict': {
220828f2 114 'id': '205387401_165548505',
9032dc28 115 'ext': 'mp4',
c52331f3 116 'title': 'No name',
ad1bc71a
RA
117 'uploader': 'Tom Cruise',
118 'uploader_id': '205387401',
c52331f3 119 'duration': 9,
ad1bc71a
RA
120 'timestamp': 1374364108,
121 'upload_date': '20130720',
9032dc28
S
122 }
123 },
ca97a56e
S
124 {
125 'note': 'Embedded video',
3c989818
RA
126 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
127 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 128 'info_dict': {
3c989818 129 'id': '-77521_162222515',
ca97a56e 130 'ext': 'mp4',
3c989818
RA
131 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
132 'title': 'ProtivoGunz - Хуёвая песня',
133 'duration': 195,
134 'upload_date': '20120212',
135 'timestamp': 1329049880,
136 'uploader_id': '-77521',
04e88ca2 137 },
ca97a56e 138 },
9032dc28 139 {
c52331f3
WS
140 # VIDEO NOW REMOVED
141 # please update if you find a video whose URL follows the same pattern
9032dc28
S
142 'url': 'http://vk.com/video-8871596_164049491',
143 'md5': 'a590bcaf3d543576c9bd162812387666',
144 'note': 'Only available for registered users',
145 'info_dict': {
220828f2 146 'id': '-8871596_164049491',
9032dc28
S
147 'ext': 'mp4',
148 'uploader': 'Триллеры',
57bdc730 149 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 150 'duration': 8352,
8117df4c
S
151 'upload_date': '20121218',
152 'view_count': int,
9032dc28 153 },
3c989818 154 'skip': 'Removed',
ca97a56e 155 },
57bdc730
S
156 {
157 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 158 'info_dict': {
220828f2 159 'id': '-43215063_168067957',
57bdc730 160 'ext': 'mp4',
3c989818 161 'uploader': 'Bro Mazter',
57bdc730
S
162 'title': ' ',
163 'duration': 7291,
42e1ff86 164 'upload_date': '20140328',
3c989818
RA
165 'uploader_id': '223413403',
166 'timestamp': 1396018030,
57bdc730
S
167 },
168 'skip': 'Requires vk account credentials',
169 },
849086a1
S
170 {
171 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
172 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
173 'note': 'ivi.ru embed',
174 'info_dict': {
220828f2 175 'id': '-43215063_169084319',
849086a1
S
176 'ext': 'mp4',
177 'title': 'Книга Илая',
178 'duration': 6771,
42e1ff86 179 'upload_date': '20140626',
8117df4c 180 'view_count': int,
849086a1 181 },
3c989818 182 'skip': 'Removed',
849086a1 183 },
79913fde
S
184 {
185 # video (removed?) only available with list id
186 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
187 'md5': '091287af5402239a1051c37ec7b92913',
188 'info_dict': {
220828f2 189 'id': '30481095_171201961',
79913fde
S
190 'ext': 'mp4',
191 'title': 'ТюменцевВВ_09.07.2015',
192 'uploader': 'Anton Ivanov',
193 'duration': 109,
194 'upload_date': '20150709',
195 'view_count': int,
196 },
a7ee8a00 197 'skip': 'Removed',
79913fde 198 },
9281f6d2
S
199 {
200 # youtube embed
201 'url': 'https://vk.com/video276849682_170681728',
202 'info_dict': {
203 'id': 'V3K4mi0SYkc',
220828f2 204 'ext': 'mp4',
9281f6d2 205 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 206 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 207 'duration': 178,
9281f6d2 208 'upload_date': '20130116',
ad1bc71a 209 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
210 'uploader_id': 'thecjf',
211 'view_count': int,
212 },
213 },
e3845525
KM
214 {
215 # dailymotion embed
216 'url': 'https://vk.com/video-37468416_456239855',
217 'info_dict': {
218 'id': 'k3lz2cmXyRuJQSjGHUv',
219 'ext': 'mp4',
220 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
5ef62fc4 221 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
e3845525
KM
222 'uploader': 'AniLibria.Tv',
223 'upload_date': '20160914',
224 'uploader_id': 'x1p5vl5',
225 'timestamp': 1473877246,
226 },
227 'params': {
228 'skip_download': True,
93aa0b63 229 },
e3845525 230 },
bf4b3b6b
S
231 {
232 # video key is extra_data not url\d+
233 'url': 'http://vk.com/video-110305615_171782105',
234 'md5': 'e13fcda136f99764872e739d13fac1d1',
235 'info_dict': {
220828f2 236 'id': '-110305615_171782105',
bf4b3b6b
S
237 'ext': 'mp4',
238 'title': 'S-Dance, репетиции к The way show',
239 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
240 'uploader_id': '-110305615',
241 'timestamp': 1454859345,
bf4b3b6b 242 'upload_date': '20160207',
ad1bc71a
RA
243 },
244 'params': {
245 'skip_download': True,
bf4b3b6b
S
246 },
247 },
93aa0b63 248 {
424ed37e 249 # finished live stream, postlive_mp4
93aa0b63 250 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 251 'info_dict': {
220828f2 252 'id': '-387766_456242764',
93aa0b63 253 'ext': 'mp4',
220828f2 254 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
255 'uploader': 'Игромания',
256 'duration': 5239,
220828f2
RA
257 # TODO: use act=show to extract view_count
258 # 'view_count': int,
259 'upload_date': '20160929',
260 'uploader_id': '-387766',
261 'timestamp': 1475137527,
93aa0b63 262 },
3c989818
RA
263 'params': {
264 'skip_download': True,
265 },
93aa0b63 266 },
475f8a45 267 {
424ed37e 268 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
269 # stream by the time you are reading this comment
270 'url': 'https://vk.com/video-140332_456239111',
271 'only_matching': True,
272 },
a8363f3a
PH
273 {
274 # removed video, just testing that we match the pattern
275 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
276 'only_matching': True,
277 },
e58066e2
S
278 {
279 # age restricted video, requires vk account credentials
280 'url': 'https://vk.com/video205387401_164765225',
281 'only_matching': True,
282 },
a5e52a1f
S
283 {
284 # pladform embed
285 'url': 'https://vk.com/video-76116461_171554880',
286 'only_matching': True,
bdafd88d
S
287 },
288 {
289 'url': 'http://new.vk.com/video205387401_165548505',
290 'only_matching': True,
643dc0fc
CP
291 },
292 {
293 # This video is no longer available, because its author has been blocked.
294 'url': 'https://vk.com/video-10639516_456240611',
295 'only_matching': True,
a640c4d2 296 },
297 {
298 # The video is not available in your region.
299 'url': 'https://vk.com/video-51812607_171445436',
300 'only_matching': True,
301 }]
9032dc28 302
60d142aa
JMF
303 def _real_extract(self, url):
304 mobj = re.match(self._VALID_URL, url)
ca97a56e
S
305 video_id = mobj.group('videoid')
306
3c989818 307 mv_data = {}
04e88ca2 308 if video_id:
3c989818
RA
309 data = {
310 'act': 'show_inline',
311 'video': video_id,
312 }
04e88ca2 313 # Some videos (removed?) can only be downloaded with list id specified
314 list_id = mobj.group('list_id')
315 if list_id:
3c989818
RA
316 data['list'] = list_id
317
318 payload = self._download_payload('al_video', video_id, data)
319 info_page = payload[1]
320 opts = payload[-1]
321 mv_data = opts.get('mvData') or {}
322 player = opts.get('player') or {}
04e88ca2 323 else:
ca97a56e 324 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 325
3c989818
RA
326 info_page = self._download_webpage(
327 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 328
3c989818
RA
329 error_message = self._html_search_regex(
330 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
331 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
332 info_page, 'error message', default=None)
333 if error_message:
334 raise ExtractorError(error_message, expected=True)
ee48b6a8 335
3c989818
RA
336 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
337 raise ExtractorError(
338 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
339 expected=True)
7f220b2f 340
3c989818 341 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 342
3c989818
RA
343 ERRORS = {
344 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
345 ERROR_COPYRIGHT,
1d1d60f6 346
3c989818
RA
347 r'>The video .*? was removed from public access by request of the copyright holder.<':
348 ERROR_COPYRIGHT,
3d36cea4 349
3c989818
RA
350 r'<!>Please log in or <':
351 'Video %s is only available for registered users, '
352 'use --username and --password options to provide account credentials.',
3d36cea4 353
3c989818
RA
354 r'<!>Unknown error':
355 'Video %s does not exist.',
1aa5172f 356
3c989818
RA
357 r'<!>Видео временно недоступно':
358 'Video %s is temporarily unavailable.',
d919fa33 359
3c989818
RA
360 r'<!>Access denied':
361 'Access denied to video %s.',
643dc0fc 362
3c989818
RA
363 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
364 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 365
3c989818
RA
366 r'<!>This video is no longer available, because its author has been blocked.':
367 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 368
3c989818
RA
369 r'<!>This video is no longer available, because it has been deleted.':
370 'Video %s is no longer available, because it has been deleted.',
a640c4d2 371
3c989818
RA
372 r'<!>The video .+? is not available in your region.':
373 'Video %s is not available in your region.',
374 }
375
376 for error_re, error_msg in ERRORS.items():
377 if re.search(error_re, info_page):
378 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 379
3c989818
RA
380 player = self._parse_json(self._search_regex(
381 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
382 info_page, 'player params'), video_id)
9334f8f1 383
5113b691 384 youtube_url = YoutubeIE._extract_url(info_page)
46478456 385 if youtube_url:
3c989818 386 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 387
09b9c45e 388 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 389 if vimeo_url is not None:
3c989818 390 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 391
c4737bea
S
392 pladform_url = PladformIE._extract_url(info_page)
393 if pladform_url:
3c989818 394 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 395
7a1818c9 396 m_rutube = re.search(
35972ba1 397 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 398 if m_rutube is not None:
7a1818c9
PH
399 rutube_url = self._proto_relative_url(
400 m_rutube.group(1).replace('\\', ''))
401 return self.url_result(rutube_url)
402
e3845525
KM
403 dailymotion_urls = DailymotionIE._extract_urls(info_page)
404 if dailymotion_urls:
405 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
406
3c989818
RA
407 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
408 if odnoklassniki_url:
409 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
410
054932f4 411 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 412 if m_opts:
054932f4 413 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
414 if m_opts_url:
415 opts_url = m_opts_url.group(1)
416 if opts_url.startswith('//'):
417 opts_url = 'http:' + opts_url
418 return self.url_result(opts_url)
419
3c989818 420 data = player['params'][0]
475f8a45
S
421 title = unescapeHTML(data['md_title'])
422
424ed37e
S
423 # 2 = live
424 # 3 = post live (finished live)
9cdb0a33
S
425 is_live = data.get('live') == 2
426 if is_live:
475f8a45
S
427 title = self._live_title(title)
428
a7ee8a00 429 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 430 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 431 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 432
70d7b323
S
433 view_count = str_to_int(self._search_regex(
434 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 435 info_page, 'view count', default=None))
8117df4c 436
bf4b3b6b 437 formats = []
475f8a45 438 for format_id, format_url in data.items():
3052a30d
S
439 format_url = url_or_none(format_url)
440 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 441 continue
3089bc74
S
442 if (format_id.startswith(('url', 'cache'))
443 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
444 height = int_or_none(self._search_regex(
445 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
446 formats.append({
447 'format_id': format_id,
448 'url': format_url,
449 'height': height,
450 })
451 elif format_id == 'hls':
452 formats.extend(self._extract_m3u8_formats(
fb4fc449 453 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 454 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
455 elif format_id == 'rtmp':
456 formats.append({
457 'format_id': format_id,
458 'url': format_url,
459 'ext': 'flv',
460 })
913f3292
PH
461 self._sort_formats(formats)
462
60d142aa 463 return {
220828f2 464 'id': video_id,
913f3292 465 'formats': formats,
475f8a45 466 'title': title,
913f3292
PH
467 'thumbnail': data.get('jpg'),
468 'uploader': data.get('md_author'),
3c989818
RA
469 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
470 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 471 'timestamp': timestamp,
8117df4c 472 'view_count': view_count,
3c989818
RA
473 'like_count': int_or_none(mv_data.get('likes')),
474 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 475 'is_live': is_live,
60d142aa 476 }
469d4c89
WS
477
478
2d19fb50 479class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
480 IE_NAME = 'vk:uservideos'
481 IE_DESC = "VK - User's Videos"
0e6ec3ca 482 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
469d4c89 483 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 484 _TESTS = [{
0e6ec3ca
RA
485 'url': 'https://vk.com/videos-767561',
486 'info_dict': {
487 'id': '-767561_all',
488 },
489 'playlist_mincount': 1150,
490 }, {
491 'url': 'https://vk.com/videos-767561?section=uploaded',
15ec6693 492 'info_dict': {
0e6ec3ca 493 'id': '-767561_uploaded',
15ec6693 494 },
0e6ec3ca
RA
495 'playlist_mincount': 425,
496 }, {
497 'url': 'http://vk.com/videos205387401',
498 'only_matching': True,
dc786d3d
S
499 }, {
500 'url': 'http://vk.com/videos-77521',
501 'only_matching': True,
0436157b
S
502 }, {
503 'url': 'http://vk.com/videos-97664626?section=all',
504 'only_matching': True,
bdafd88d
S
505 }, {
506 'url': 'http://m.vk.com/videos205387401',
507 'only_matching': True,
508 }, {
509 'url': 'http://new.vk.com/videos205387401',
510 'only_matching': True,
dc786d3d 511 }]
0e6ec3ca
RA
512 _PAGE_SIZE = 1000
513 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
dc786d3d 514
0e6ec3ca 515 def _fetch_page(self, page_id, section, page):
3c989818
RA
516 l = self._download_payload('al_video', page_id, {
517 'act': 'load_videos_silent',
0e6ec3ca 518 'offset': page * self._PAGE_SIZE,
3c989818 519 'oid': page_id,
0e6ec3ca
RA
520 'section': section,
521 })[0][section]['list']
dc786d3d 522
3c989818 523 for video in l:
0e6ec3ca 524 v = self._VIDEO._make(video[:2])
3c989818 525 video_id = '%d_%d' % (v.owner_id, v.id)
0e6ec3ca
RA
526 yield self.url_result(
527 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
528
529 def _real_extract(self, url):
530 page_id, section = re.match(self._VALID_URL, url).groups()
531 if not section:
532 section = 'all'
533
534 entries = OnDemandPagedList(
535 functools.partial(self._fetch_page, page_id, section),
536 self._PAGE_SIZE)
dc786d3d 537
0e6ec3ca 538 return self.playlist_result(entries, '%s_%s' % (page_id, section))
2d19fb50
S
539
540
541class VKWallPostIE(VKBaseIE):
542 IE_NAME = 'vk:wallpost'
543 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
544 _TESTS = [{
545 # public page URL, audio playlist
546 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
547 'info_dict': {
3c989818
RA
548 'id': '-23538238_35',
549 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
550 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
551 },
552 'playlist': [{
553 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
554 'info_dict': {
555 'id': '135220665_111806521',
3c989818 556 'ext': 'mp4',
2d19fb50
S
557 'title': 'Black Shadow - Слепое Верование',
558 'duration': 370,
559 'uploader': 'Black Shadow',
560 'artist': 'Black Shadow',
561 'track': 'Слепое Верование',
562 },
563 }, {
564 'md5': '4cc7e804579122b17ea95af7834c9233',
565 'info_dict': {
566 'id': '135220665_111802303',
3c989818 567 'ext': 'mp4',
2d19fb50
S
568 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
569 'duration': 423,
570 'uploader': 'Black Shadow',
571 'artist': 'Black Shadow',
572 'track': 'Война - Негасимое Бездны Пламя!',
573 },
2d19fb50 574 }],
51815886 575 'params': {
3c989818 576 'skip_download': True,
51815886
S
577 'usenetrc': True,
578 },
2d19fb50
S
579 'skip': 'Requires vk account credentials',
580 }, {
581 # single YouTube embed, no leading -
582 'url': 'https://vk.com/wall85155021_6319',
583 'info_dict': {
584 'id': '85155021_6319',
3c989818 585 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
586 },
587 'playlist_count': 1,
51815886
S
588 'params': {
589 'usenetrc': True,
590 },
2d19fb50
S
591 'skip': 'Requires vk account credentials',
592 }, {
593 # wall page URL
594 'url': 'https://vk.com/wall-23538238_35',
595 'only_matching': True,
596 }, {
597 # mobile wall page URL
598 'url': 'https://m.vk.com/wall-23538238_35',
599 'only_matching': True,
600 }]
3c989818 601 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
0e6ec3ca 602 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
3c989818
RA
603
604 def _decode(self, enc):
605 dec = ''
606 e = n = 0
607 for c in enc:
608 r = self._BASE64_CHARS.index(c)
609 cond = n % 4
610 e = 64 * e + r if cond else r
611 n += 1
612 if cond:
613 dec += chr(255 & e >> (-2 * n & 6))
614 return dec
615
616 def _unmask_url(self, mask_url, vk_id):
617 if 'audio_api_unavailable' in mask_url:
618 extra = mask_url.split('?extra=')[1].split('#')
619 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
620 mask_url = list(self._decode(extra[0]))
621 url_len = len(mask_url)
622 indexes = [None] * url_len
623 index = int(base) ^ vk_id
624 for n in range(url_len - 1, -1, -1):
625 index = (url_len * (n + 1) ^ index + n) % url_len
626 indexes[n] = index
627 for n in range(1, url_len):
628 c = mask_url[n]
629 index = indexes[url_len - 1 - n]
630 mask_url[n] = mask_url[index]
631 mask_url[index] = c
632 mask_url = ''.join(mask_url)
633 return mask_url
2d19fb50
S
634
635 def _real_extract(self, url):
636 post_id = self._match_id(url)
637
3c989818
RA
638 webpage = self._download_payload('wkview', post_id, {
639 'act': 'show',
640 'w': 'wall' + post_id,
641 })[1]
2d19fb50
S
642
643 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 644 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
645
646 entries = []
647
3c989818
RA
648 for audio in re.findall(r'data-audio="([^"]+)', webpage):
649 audio = self._parse_json(unescapeHTML(audio), post_id)
0e6ec3ca 650 a = self._AUDIO._make(audio[:16])
3c989818
RA
651 if not a.url:
652 continue
653 title = unescapeHTML(a.title)
7e70620a 654 performer = unescapeHTML(a.performer)
3c989818
RA
655 entries.append({
656 'id': '%s_%s' % (a.owner_id, a.id),
657 'url': self._unmask_url(a.url, a.ads['vk_id']),
7e70620a
RA
658 'title': '%s - %s' % (performer, title) if performer else title,
659 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
660 'duration': int_or_none(a.duration),
3c989818 661 'uploader': uploader,
7e70620a 662 'artist': performer,
3c989818
RA
663 'track': title,
664 'ext': 'mp4',
665 'protocol': 'm3u8',
666 })
2d19fb50
S
667
668 for video in re.finditer(
669 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
670 entries.append(self.url_result(
671 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
672
673 title = 'Wall post %s' % post_id
674
675 return self.playlist_result(
676 orderedSet(entries), post_id,
677 '%s - %s' % (uploader, title) if uploader else title,
678 description)