]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/vk.py
[ivi] improve error detection
[yt-dlp.git] / youtube_dl / extractor / vk.py
CommitLineData
dcdb292f 1# coding: utf-8
94a23d2a
PH
2from __future__ import unicode_literals
3
51815886 4import collections
60d142aa 5import re
60d142aa
JMF
6
7from .common import InfoExtractor
059cd768 8from ..compat import compat_urlparse
60d142aa 9from ..utils import (
2d19fb50 10 clean_html,
9032dc28 11 ExtractorError,
2d19fb50 12 get_element_by_class,
bf4b3b6b 13 int_or_none,
1cc79574 14 orderedSet,
ad1bc71a 15 str_or_none,
8117df4c 16 str_to_int,
60d142aa 17 unescapeHTML,
a7ee8a00 18 unified_timestamp,
3052a30d 19 url_or_none,
6e6bc8da 20 urlencode_postdata,
1cc79574 21)
e3845525 22from .dailymotion import DailymotionIE
3c989818 23from .odnoklassniki import OdnoklassnikiIE
c4737bea 24from .pladform import PladformIE
e3845525 25from .vimeo import VimeoIE
5113b691 26from .youtube import YoutubeIE
60d142aa
JMF
27
28
2d19fb50
S
29class VKBaseIE(InfoExtractor):
30 _NETRC_MACHINE = 'vk'
31
32 def _login(self):
68217024 33 username, password = self._get_login_info()
2d19fb50
S
34 if username is None:
35 return
36
37 login_page, url_handle = self._download_webpage_handle(
38 'https://vk.com', None, 'Downloading login page')
39
40 login_form = self._hidden_inputs(login_page)
41
42 login_form.update({
43 'email': username.encode('cp1251'),
44 'pass': password.encode('cp1251'),
45 })
46
e3c1266f
S
47 # vk serves two same remixlhk cookies in Set-Cookie header and expects
48 # first one to be actually set
49 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
50
51 login_page = self._download_webpage(
52 'https://login.vk.com/?act=login', None,
e4d95865 53 note='Logging in',
2d19fb50
S
54 data=urlencode_postdata(login_form))
55
56 if re.search(r'onLoginFailed', login_page):
57 raise ExtractorError(
58 'Unable to login, incorrect username and/or password', expected=True)
59
60 def _real_initialize(self):
61 self._login()
62
3c989818
RA
63 def _download_payload(self, path, video_id, data, fatal=True):
64 data['al'] = 1
65 code, payload = self._download_json(
66 'https://vk.com/%s.php' % path, video_id,
67 data=urlencode_postdata(data), fatal=fatal,
68 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
69 if code == '3':
70 self.raise_login_required()
71 elif code == '8':
72 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
73 return payload
74
2d19fb50
S
75
76class VKIE(VKBaseIE):
1ecb5d1d
S
77 IE_NAME = 'vk'
78 IE_DESC = 'VK'
cf9cf7dd
S
79 _VALID_URL = r'''(?x)
80 https?://
81 (?:
04e88ca2 82 (?:
bdafd88d 83 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 84 (?:www\.)?daxab.com/
85 )
86 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 87 (?:
bdafd88d 88 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
04e88ca2 89 (?:www\.)?daxab.com/embed/
cf9cf7dd 90 )
04e88ca2 91 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
cf9cf7dd
S
92 )
93 '''
9032dc28
S
94 _TESTS = [
95 {
96 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 97 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 98 'info_dict': {
220828f2 99 'id': '-77521_162222515',
09f934b0 100 'ext': 'mp4',
9032dc28 101 'title': 'ProtivoGunz - Хуёвая песня',
36300346 102 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 103 'uploader_id': '-77521',
9032dc28 104 'duration': 195,
ad1bc71a 105 'timestamp': 1329049880,
42e1ff86 106 'upload_date': '20120212',
9032dc28 107 },
60d142aa 108 },
9032dc28 109 {
c52331f3 110 'url': 'http://vk.com/video205387401_165548505',
9032dc28 111 'info_dict': {
220828f2 112 'id': '205387401_165548505',
9032dc28 113 'ext': 'mp4',
c52331f3 114 'title': 'No name',
ad1bc71a
RA
115 'uploader': 'Tom Cruise',
116 'uploader_id': '205387401',
c52331f3 117 'duration': 9,
ad1bc71a
RA
118 'timestamp': 1374364108,
119 'upload_date': '20130720',
9032dc28
S
120 }
121 },
ca97a56e
S
122 {
123 'note': 'Embedded video',
3c989818
RA
124 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
125 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 126 'info_dict': {
3c989818 127 'id': '-77521_162222515',
ca97a56e 128 'ext': 'mp4',
3c989818
RA
129 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
130 'title': 'ProtivoGunz - Хуёвая песня',
131 'duration': 195,
132 'upload_date': '20120212',
133 'timestamp': 1329049880,
134 'uploader_id': '-77521',
04e88ca2 135 },
ca97a56e 136 },
9032dc28 137 {
c52331f3
WS
138 # VIDEO NOW REMOVED
139 # please update if you find a video whose URL follows the same pattern
9032dc28
S
140 'url': 'http://vk.com/video-8871596_164049491',
141 'md5': 'a590bcaf3d543576c9bd162812387666',
142 'note': 'Only available for registered users',
143 'info_dict': {
220828f2 144 'id': '-8871596_164049491',
9032dc28
S
145 'ext': 'mp4',
146 'uploader': 'Триллеры',
57bdc730 147 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 148 'duration': 8352,
8117df4c
S
149 'upload_date': '20121218',
150 'view_count': int,
9032dc28 151 },
3c989818 152 'skip': 'Removed',
ca97a56e 153 },
57bdc730
S
154 {
155 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 156 'info_dict': {
220828f2 157 'id': '-43215063_168067957',
57bdc730 158 'ext': 'mp4',
3c989818 159 'uploader': 'Bro Mazter',
57bdc730
S
160 'title': ' ',
161 'duration': 7291,
42e1ff86 162 'upload_date': '20140328',
3c989818
RA
163 'uploader_id': '223413403',
164 'timestamp': 1396018030,
57bdc730
S
165 },
166 'skip': 'Requires vk account credentials',
167 },
849086a1
S
168 {
169 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
170 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
171 'note': 'ivi.ru embed',
172 'info_dict': {
220828f2 173 'id': '-43215063_169084319',
849086a1
S
174 'ext': 'mp4',
175 'title': 'Книга Илая',
176 'duration': 6771,
42e1ff86 177 'upload_date': '20140626',
8117df4c 178 'view_count': int,
849086a1 179 },
3c989818 180 'skip': 'Removed',
849086a1 181 },
79913fde
S
182 {
183 # video (removed?) only available with list id
184 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
185 'md5': '091287af5402239a1051c37ec7b92913',
186 'info_dict': {
220828f2 187 'id': '30481095_171201961',
79913fde
S
188 'ext': 'mp4',
189 'title': 'ТюменцевВВ_09.07.2015',
190 'uploader': 'Anton Ivanov',
191 'duration': 109,
192 'upload_date': '20150709',
193 'view_count': int,
194 },
a7ee8a00 195 'skip': 'Removed',
79913fde 196 },
9281f6d2
S
197 {
198 # youtube embed
199 'url': 'https://vk.com/video276849682_170681728',
200 'info_dict': {
201 'id': 'V3K4mi0SYkc',
220828f2 202 'ext': 'mp4',
9281f6d2 203 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 204 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 205 'duration': 178,
9281f6d2 206 'upload_date': '20130116',
ad1bc71a 207 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
208 'uploader_id': 'thecjf',
209 'view_count': int,
210 },
211 },
e3845525
KM
212 {
213 # dailymotion embed
214 'url': 'https://vk.com/video-37468416_456239855',
215 'info_dict': {
216 'id': 'k3lz2cmXyRuJQSjGHUv',
217 'ext': 'mp4',
218 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
ad1bc71a 219 # TODO: fix test by fixing dailymotion description extraction
e3845525
KM
220 'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
221 'uploader': 'AniLibria.Tv',
222 'upload_date': '20160914',
223 'uploader_id': 'x1p5vl5',
224 'timestamp': 1473877246,
225 },
226 'params': {
227 'skip_download': True,
93aa0b63 228 },
e3845525 229 },
bf4b3b6b
S
230 {
231 # video key is extra_data not url\d+
232 'url': 'http://vk.com/video-110305615_171782105',
233 'md5': 'e13fcda136f99764872e739d13fac1d1',
234 'info_dict': {
220828f2 235 'id': '-110305615_171782105',
bf4b3b6b
S
236 'ext': 'mp4',
237 'title': 'S-Dance, репетиции к The way show',
238 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
239 'uploader_id': '-110305615',
240 'timestamp': 1454859345,
bf4b3b6b 241 'upload_date': '20160207',
ad1bc71a
RA
242 },
243 'params': {
244 'skip_download': True,
bf4b3b6b
S
245 },
246 },
93aa0b63 247 {
424ed37e 248 # finished live stream, postlive_mp4
93aa0b63 249 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 250 'info_dict': {
220828f2 251 'id': '-387766_456242764',
93aa0b63 252 'ext': 'mp4',
220828f2 253 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
254 'uploader': 'Игромания',
255 'duration': 5239,
220828f2
RA
256 # TODO: use act=show to extract view_count
257 # 'view_count': int,
258 'upload_date': '20160929',
259 'uploader_id': '-387766',
260 'timestamp': 1475137527,
93aa0b63 261 },
3c989818
RA
262 'params': {
263 'skip_download': True,
264 },
93aa0b63 265 },
475f8a45 266 {
424ed37e 267 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
268 # stream by the time you are reading this comment
269 'url': 'https://vk.com/video-140332_456239111',
270 'only_matching': True,
271 },
a8363f3a
PH
272 {
273 # removed video, just testing that we match the pattern
274 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
275 'only_matching': True,
276 },
e58066e2
S
277 {
278 # age restricted video, requires vk account credentials
279 'url': 'https://vk.com/video205387401_164765225',
280 'only_matching': True,
281 },
a5e52a1f
S
282 {
283 # pladform embed
284 'url': 'https://vk.com/video-76116461_171554880',
285 'only_matching': True,
bdafd88d
S
286 },
287 {
288 'url': 'http://new.vk.com/video205387401_165548505',
289 'only_matching': True,
643dc0fc
CP
290 },
291 {
292 # This video is no longer available, because its author has been blocked.
293 'url': 'https://vk.com/video-10639516_456240611',
294 'only_matching': True,
a640c4d2 295 },
296 {
297 # The video is not available in your region.
298 'url': 'https://vk.com/video-51812607_171445436',
299 'only_matching': True,
300 }]
9032dc28 301
60d142aa
JMF
302 def _real_extract(self, url):
303 mobj = re.match(self._VALID_URL, url)
ca97a56e
S
304 video_id = mobj.group('videoid')
305
3c989818 306 mv_data = {}
04e88ca2 307 if video_id:
3c989818
RA
308 data = {
309 'act': 'show_inline',
310 'video': video_id,
311 }
04e88ca2 312 # Some videos (removed?) can only be downloaded with list id specified
313 list_id = mobj.group('list_id')
314 if list_id:
3c989818
RA
315 data['list'] = list_id
316
317 payload = self._download_payload('al_video', video_id, data)
318 info_page = payload[1]
319 opts = payload[-1]
320 mv_data = opts.get('mvData') or {}
321 player = opts.get('player') or {}
04e88ca2 322 else:
ca97a56e 323 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 324
3c989818
RA
325 info_page = self._download_webpage(
326 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 327
3c989818
RA
328 error_message = self._html_search_regex(
329 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
330 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
331 info_page, 'error message', default=None)
332 if error_message:
333 raise ExtractorError(error_message, expected=True)
ee48b6a8 334
3c989818
RA
335 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
336 raise ExtractorError(
337 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
338 expected=True)
7f220b2f 339
3c989818 340 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 341
3c989818
RA
342 ERRORS = {
343 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
344 ERROR_COPYRIGHT,
1d1d60f6 345
3c989818
RA
346 r'>The video .*? was removed from public access by request of the copyright holder.<':
347 ERROR_COPYRIGHT,
3d36cea4 348
3c989818
RA
349 r'<!>Please log in or <':
350 'Video %s is only available for registered users, '
351 'use --username and --password options to provide account credentials.',
3d36cea4 352
3c989818
RA
353 r'<!>Unknown error':
354 'Video %s does not exist.',
1aa5172f 355
3c989818
RA
356 r'<!>Видео временно недоступно':
357 'Video %s is temporarily unavailable.',
d919fa33 358
3c989818
RA
359 r'<!>Access denied':
360 'Access denied to video %s.',
643dc0fc 361
3c989818
RA
362 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
363 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 364
3c989818
RA
365 r'<!>This video is no longer available, because its author has been blocked.':
366 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 367
3c989818
RA
368 r'<!>This video is no longer available, because it has been deleted.':
369 'Video %s is no longer available, because it has been deleted.',
a640c4d2 370
3c989818
RA
371 r'<!>The video .+? is not available in your region.':
372 'Video %s is not available in your region.',
373 }
374
375 for error_re, error_msg in ERRORS.items():
376 if re.search(error_re, info_page):
377 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 378
3c989818
RA
379 player = self._parse_json(self._search_regex(
380 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
381 info_page, 'player params'), video_id)
9334f8f1 382
5113b691 383 youtube_url = YoutubeIE._extract_url(info_page)
46478456 384 if youtube_url:
3c989818 385 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 386
09b9c45e 387 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 388 if vimeo_url is not None:
3c989818 389 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 390
c4737bea
S
391 pladform_url = PladformIE._extract_url(info_page)
392 if pladform_url:
3c989818 393 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 394
7a1818c9 395 m_rutube = re.search(
35972ba1 396 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 397 if m_rutube is not None:
7a1818c9
PH
398 rutube_url = self._proto_relative_url(
399 m_rutube.group(1).replace('\\', ''))
400 return self.url_result(rutube_url)
401
e3845525
KM
402 dailymotion_urls = DailymotionIE._extract_urls(info_page)
403 if dailymotion_urls:
404 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
405
3c989818
RA
406 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
407 if odnoklassniki_url:
408 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
409
054932f4 410 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 411 if m_opts:
054932f4 412 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
413 if m_opts_url:
414 opts_url = m_opts_url.group(1)
415 if opts_url.startswith('//'):
416 opts_url = 'http:' + opts_url
417 return self.url_result(opts_url)
418
3c989818 419 data = player['params'][0]
475f8a45
S
420 title = unescapeHTML(data['md_title'])
421
424ed37e
S
422 # 2 = live
423 # 3 = post live (finished live)
9cdb0a33
S
424 is_live = data.get('live') == 2
425 if is_live:
475f8a45
S
426 title = self._live_title(title)
427
a7ee8a00 428 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 429 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 430 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 431
70d7b323
S
432 view_count = str_to_int(self._search_regex(
433 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 434 info_page, 'view count', default=None))
8117df4c 435
bf4b3b6b 436 formats = []
475f8a45 437 for format_id, format_url in data.items():
3052a30d
S
438 format_url = url_or_none(format_url)
439 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 440 continue
3089bc74
S
441 if (format_id.startswith(('url', 'cache'))
442 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
443 height = int_or_none(self._search_regex(
444 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
445 formats.append({
446 'format_id': format_id,
447 'url': format_url,
448 'height': height,
449 })
450 elif format_id == 'hls':
451 formats.extend(self._extract_m3u8_formats(
fb4fc449 452 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 453 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
454 elif format_id == 'rtmp':
455 formats.append({
456 'format_id': format_id,
457 'url': format_url,
458 'ext': 'flv',
459 })
913f3292
PH
460 self._sort_formats(formats)
461
60d142aa 462 return {
220828f2 463 'id': video_id,
913f3292 464 'formats': formats,
475f8a45 465 'title': title,
913f3292
PH
466 'thumbnail': data.get('jpg'),
467 'uploader': data.get('md_author'),
3c989818
RA
468 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
469 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 470 'timestamp': timestamp,
8117df4c 471 'view_count': view_count,
3c989818
RA
472 'like_count': int_or_none(mv_data.get('likes')),
473 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 474 'is_live': is_live,
60d142aa 475 }
469d4c89
WS
476
477
2d19fb50 478class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
479 IE_NAME = 'vk:uservideos'
480 IE_DESC = "VK - User's Videos"
bdafd88d 481 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
469d4c89 482 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 483 _TESTS = [{
469d4c89 484 'url': 'http://vk.com/videos205387401',
15ec6693
PH
485 'info_dict': {
486 'id': '205387401',
487 },
469d4c89 488 'playlist_mincount': 4,
dc786d3d
S
489 }, {
490 'url': 'http://vk.com/videos-77521',
491 'only_matching': True,
0436157b
S
492 }, {
493 'url': 'http://vk.com/videos-97664626?section=all',
494 'only_matching': True,
bdafd88d
S
495 }, {
496 'url': 'http://m.vk.com/videos205387401',
497 'only_matching': True,
498 }, {
499 'url': 'http://new.vk.com/videos205387401',
500 'only_matching': True,
dc786d3d 501 }]
3c989818
RA
502 _VIDEO = collections.namedtuple(
503 'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta'])
469d4c89 504
469d4c89 505 def _real_extract(self, url):
021a0db8 506 page_id = self._match_id(url)
dc786d3d 507
3c989818
RA
508 l = self._download_payload('al_video', page_id, {
509 'act': 'load_videos_silent',
510 'oid': page_id,
511 })[0]['']['list']
dc786d3d 512
3c989818
RA
513 entries = []
514 for video in l:
515 v = self._VIDEO._make(video)
516 video_id = '%d_%d' % (v.owner_id, v.id)
517 entries.append(self.url_result(
518 'http://vk.com/video' + video_id, 'VK', video_id=video_id))
dc786d3d 519
3c989818 520 return self.playlist_result(entries, page_id)
2d19fb50
S
521
522
523class VKWallPostIE(VKBaseIE):
524 IE_NAME = 'vk:wallpost'
525 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
526 _TESTS = [{
527 # public page URL, audio playlist
528 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
529 'info_dict': {
3c989818
RA
530 'id': '-23538238_35',
531 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
532 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
533 },
534 'playlist': [{
535 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
536 'info_dict': {
537 'id': '135220665_111806521',
3c989818 538 'ext': 'mp4',
2d19fb50
S
539 'title': 'Black Shadow - Слепое Верование',
540 'duration': 370,
541 'uploader': 'Black Shadow',
542 'artist': 'Black Shadow',
543 'track': 'Слепое Верование',
544 },
545 }, {
546 'md5': '4cc7e804579122b17ea95af7834c9233',
547 'info_dict': {
548 'id': '135220665_111802303',
3c989818 549 'ext': 'mp4',
2d19fb50
S
550 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
551 'duration': 423,
552 'uploader': 'Black Shadow',
553 'artist': 'Black Shadow',
554 'track': 'Война - Негасимое Бездны Пламя!',
555 },
2d19fb50 556 }],
51815886 557 'params': {
3c989818 558 'skip_download': True,
51815886
S
559 'usenetrc': True,
560 },
2d19fb50
S
561 'skip': 'Requires vk account credentials',
562 }, {
563 # single YouTube embed, no leading -
564 'url': 'https://vk.com/wall85155021_6319',
565 'info_dict': {
566 'id': '85155021_6319',
3c989818 567 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
568 },
569 'playlist_count': 1,
51815886
S
570 'params': {
571 'usenetrc': True,
572 },
2d19fb50
S
573 'skip': 'Requires vk account credentials',
574 }, {
575 # wall page URL
576 'url': 'https://vk.com/wall-23538238_35',
577 'only_matching': True,
578 }, {
579 # mobile wall page URL
580 'url': 'https://m.vk.com/wall-23538238_35',
581 'only_matching': True,
582 }]
3c989818
RA
583 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
584 _AUDIO = collections.namedtuple(
585 'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key'])
586
587 def _decode(self, enc):
588 dec = ''
589 e = n = 0
590 for c in enc:
591 r = self._BASE64_CHARS.index(c)
592 cond = n % 4
593 e = 64 * e + r if cond else r
594 n += 1
595 if cond:
596 dec += chr(255 & e >> (-2 * n & 6))
597 return dec
598
599 def _unmask_url(self, mask_url, vk_id):
600 if 'audio_api_unavailable' in mask_url:
601 extra = mask_url.split('?extra=')[1].split('#')
602 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
603 mask_url = list(self._decode(extra[0]))
604 url_len = len(mask_url)
605 indexes = [None] * url_len
606 index = int(base) ^ vk_id
607 for n in range(url_len - 1, -1, -1):
608 index = (url_len * (n + 1) ^ index + n) % url_len
609 indexes[n] = index
610 for n in range(1, url_len):
611 c = mask_url[n]
612 index = indexes[url_len - 1 - n]
613 mask_url[n] = mask_url[index]
614 mask_url[index] = c
615 mask_url = ''.join(mask_url)
616 return mask_url
2d19fb50
S
617
618 def _real_extract(self, url):
619 post_id = self._match_id(url)
620
3c989818
RA
621 webpage = self._download_payload('wkview', post_id, {
622 'act': 'show',
623 'w': 'wall' + post_id,
624 })[1]
2d19fb50
S
625
626 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 627 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
628
629 entries = []
630
3c989818
RA
631 for audio in re.findall(r'data-audio="([^"]+)', webpage):
632 audio = self._parse_json(unescapeHTML(audio), post_id)
633 a = self._AUDIO._make(audio)
634 if not a.url:
635 continue
636 title = unescapeHTML(a.title)
637 entries.append({
638 'id': '%s_%s' % (a.owner_id, a.id),
639 'url': self._unmask_url(a.url, a.ads['vk_id']),
640 'title': '%s - %s' % (a.performer, title) if a.performer else title,
641 'thumbnail': a.cover_url.split(',') if a.cover_url else None,
642 'duration': a.duration,
643 'uploader': uploader,
644 'artist': a.performer,
645 'track': title,
646 'ext': 'mp4',
647 'protocol': 'm3u8',
648 })
2d19fb50
S
649
650 for video in re.finditer(
651 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
652 entries.append(self.url_result(
653 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
654
655 title = 'Wall post %s' % post_id
656
657 return self.playlist_result(
658 orderedSet(entries), post_id,
659 '%s - %s' % (uploader, title) if uploader else title,
660 description)