]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vk.py
Fix control characters being printed to `--console-title`
[yt-dlp.git] / yt_dlp / extractor / vk.py
CommitLineData
dcdb292f 1# coding: utf-8
94a23d2a
PH
2from __future__ import unicode_literals
3
51815886 4import collections
0e6ec3ca 5import functools
60d142aa 6import re
60d142aa
JMF
7
8from .common import InfoExtractor
059cd768 9from ..compat import compat_urlparse
60d142aa 10from ..utils import (
2d19fb50 11 clean_html,
9032dc28 12 ExtractorError,
2d19fb50 13 get_element_by_class,
bf4b3b6b 14 int_or_none,
0e6ec3ca 15 OnDemandPagedList,
1cc79574 16 orderedSet,
ad1bc71a 17 str_or_none,
8117df4c 18 str_to_int,
60d142aa 19 unescapeHTML,
a7ee8a00 20 unified_timestamp,
3052a30d 21 url_or_none,
6e6bc8da 22 urlencode_postdata,
1cc79574 23)
e3845525 24from .dailymotion import DailymotionIE
3c989818 25from .odnoklassniki import OdnoklassnikiIE
c4737bea 26from .pladform import PladformIE
e3845525 27from .vimeo import VimeoIE
5113b691 28from .youtube import YoutubeIE
60d142aa
JMF
29
30
2d19fb50
S
31class VKBaseIE(InfoExtractor):
32 _NETRC_MACHINE = 'vk'
33
34 def _login(self):
68217024 35 username, password = self._get_login_info()
2d19fb50
S
36 if username is None:
37 return
38
39 login_page, url_handle = self._download_webpage_handle(
40 'https://vk.com', None, 'Downloading login page')
41
42 login_form = self._hidden_inputs(login_page)
43
44 login_form.update({
45 'email': username.encode('cp1251'),
46 'pass': password.encode('cp1251'),
47 })
48
e3c1266f
S
49 # vk serves two same remixlhk cookies in Set-Cookie header and expects
50 # first one to be actually set
51 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
52
53 login_page = self._download_webpage(
f0ffaa16 54 'https://vk.com/login', None,
e4d95865 55 note='Logging in',
2d19fb50
S
56 data=urlencode_postdata(login_form))
57
58 if re.search(r'onLoginFailed', login_page):
59 raise ExtractorError(
60 'Unable to login, incorrect username and/or password', expected=True)
61
62 def _real_initialize(self):
63 self._login()
64
3c989818
RA
65 def _download_payload(self, path, video_id, data, fatal=True):
66 data['al'] = 1
67 code, payload = self._download_json(
68 'https://vk.com/%s.php' % path, video_id,
69 data=urlencode_postdata(data), fatal=fatal,
70 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
71 if code == '3':
72 self.raise_login_required()
73 elif code == '8':
74 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
75 return payload
76
2d19fb50
S
77
78class VKIE(VKBaseIE):
1ecb5d1d
S
79 IE_NAME = 'vk'
80 IE_DESC = 'VK'
cf9cf7dd
S
81 _VALID_URL = r'''(?x)
82 https?://
83 (?:
04e88ca2 84 (?:
bdafd88d 85 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 86 (?:www\.)?daxab.com/
87 )
88 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 89 (?:
bdafd88d 90 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
04e88ca2 91 (?:www\.)?daxab.com/embed/
cf9cf7dd 92 )
04e88ca2 93 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
cf9cf7dd
S
94 )
95 '''
9032dc28
S
96 _TESTS = [
97 {
98 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 99 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 100 'info_dict': {
220828f2 101 'id': '-77521_162222515',
09f934b0 102 'ext': 'mp4',
9032dc28 103 'title': 'ProtivoGunz - Хуёвая песня',
36300346 104 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 105 'uploader_id': '-77521',
9032dc28 106 'duration': 195,
ad1bc71a 107 'timestamp': 1329049880,
42e1ff86 108 'upload_date': '20120212',
9032dc28 109 },
60d142aa 110 },
9032dc28 111 {
c52331f3 112 'url': 'http://vk.com/video205387401_165548505',
9032dc28 113 'info_dict': {
220828f2 114 'id': '205387401_165548505',
9032dc28 115 'ext': 'mp4',
c52331f3 116 'title': 'No name',
ad1bc71a
RA
117 'uploader': 'Tom Cruise',
118 'uploader_id': '205387401',
c52331f3 119 'duration': 9,
ad1bc71a
RA
120 'timestamp': 1374364108,
121 'upload_date': '20130720',
9032dc28
S
122 }
123 },
ca97a56e
S
124 {
125 'note': 'Embedded video',
3c989818
RA
126 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
127 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 128 'info_dict': {
3c989818 129 'id': '-77521_162222515',
ca97a56e 130 'ext': 'mp4',
3c989818
RA
131 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
132 'title': 'ProtivoGunz - Хуёвая песня',
133 'duration': 195,
134 'upload_date': '20120212',
135 'timestamp': 1329049880,
136 'uploader_id': '-77521',
04e88ca2 137 },
ca97a56e 138 },
9032dc28 139 {
c52331f3
WS
140 # VIDEO NOW REMOVED
141 # please update if you find a video whose URL follows the same pattern
9032dc28
S
142 'url': 'http://vk.com/video-8871596_164049491',
143 'md5': 'a590bcaf3d543576c9bd162812387666',
144 'note': 'Only available for registered users',
145 'info_dict': {
220828f2 146 'id': '-8871596_164049491',
9032dc28
S
147 'ext': 'mp4',
148 'uploader': 'Триллеры',
57bdc730 149 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 150 'duration': 8352,
8117df4c
S
151 'upload_date': '20121218',
152 'view_count': int,
9032dc28 153 },
3c989818 154 'skip': 'Removed',
ca97a56e 155 },
57bdc730
S
156 {
157 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 158 'info_dict': {
220828f2 159 'id': '-43215063_168067957',
57bdc730 160 'ext': 'mp4',
3c989818 161 'uploader': 'Bro Mazter',
57bdc730
S
162 'title': ' ',
163 'duration': 7291,
42e1ff86 164 'upload_date': '20140328',
3c989818
RA
165 'uploader_id': '223413403',
166 'timestamp': 1396018030,
57bdc730
S
167 },
168 'skip': 'Requires vk account credentials',
169 },
849086a1
S
170 {
171 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
172 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
173 'note': 'ivi.ru embed',
174 'info_dict': {
220828f2 175 'id': '-43215063_169084319',
849086a1
S
176 'ext': 'mp4',
177 'title': 'Книга Илая',
178 'duration': 6771,
42e1ff86 179 'upload_date': '20140626',
8117df4c 180 'view_count': int,
849086a1 181 },
3c989818 182 'skip': 'Removed',
849086a1 183 },
79913fde
S
184 {
185 # video (removed?) only available with list id
186 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
187 'md5': '091287af5402239a1051c37ec7b92913',
188 'info_dict': {
220828f2 189 'id': '30481095_171201961',
79913fde
S
190 'ext': 'mp4',
191 'title': 'ТюменцевВВ_09.07.2015',
192 'uploader': 'Anton Ivanov',
193 'duration': 109,
194 'upload_date': '20150709',
195 'view_count': int,
196 },
a7ee8a00 197 'skip': 'Removed',
79913fde 198 },
9281f6d2
S
199 {
200 # youtube embed
201 'url': 'https://vk.com/video276849682_170681728',
202 'info_dict': {
203 'id': 'V3K4mi0SYkc',
220828f2 204 'ext': 'mp4',
9281f6d2 205 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 206 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 207 'duration': 178,
9281f6d2 208 'upload_date': '20130116',
ad1bc71a 209 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
210 'uploader_id': 'thecjf',
211 'view_count': int,
212 },
213 },
e3845525
KM
214 {
215 # dailymotion embed
216 'url': 'https://vk.com/video-37468416_456239855',
217 'info_dict': {
218 'id': 'k3lz2cmXyRuJQSjGHUv',
219 'ext': 'mp4',
220 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
5ef62fc4 221 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
e3845525
KM
222 'uploader': 'AniLibria.Tv',
223 'upload_date': '20160914',
224 'uploader_id': 'x1p5vl5',
225 'timestamp': 1473877246,
226 },
227 'params': {
228 'skip_download': True,
93aa0b63 229 },
e3845525 230 },
bf4b3b6b
S
231 {
232 # video key is extra_data not url\d+
233 'url': 'http://vk.com/video-110305615_171782105',
234 'md5': 'e13fcda136f99764872e739d13fac1d1',
235 'info_dict': {
220828f2 236 'id': '-110305615_171782105',
bf4b3b6b
S
237 'ext': 'mp4',
238 'title': 'S-Dance, репетиции к The way show',
239 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
240 'uploader_id': '-110305615',
241 'timestamp': 1454859345,
bf4b3b6b 242 'upload_date': '20160207',
ad1bc71a
RA
243 },
244 'params': {
245 'skip_download': True,
bf4b3b6b
S
246 },
247 },
93aa0b63 248 {
424ed37e 249 # finished live stream, postlive_mp4
93aa0b63 250 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 251 'info_dict': {
220828f2 252 'id': '-387766_456242764',
93aa0b63 253 'ext': 'mp4',
220828f2 254 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
255 'uploader': 'Игромания',
256 'duration': 5239,
220828f2
RA
257 # TODO: use act=show to extract view_count
258 # 'view_count': int,
259 'upload_date': '20160929',
260 'uploader_id': '-387766',
261 'timestamp': 1475137527,
93aa0b63 262 },
3c989818
RA
263 'params': {
264 'skip_download': True,
265 },
93aa0b63 266 },
475f8a45 267 {
424ed37e 268 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
269 # stream by the time you are reading this comment
270 'url': 'https://vk.com/video-140332_456239111',
271 'only_matching': True,
272 },
a8363f3a
PH
273 {
274 # removed video, just testing that we match the pattern
275 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
276 'only_matching': True,
277 },
e58066e2
S
278 {
279 # age restricted video, requires vk account credentials
280 'url': 'https://vk.com/video205387401_164765225',
281 'only_matching': True,
282 },
a5e52a1f
S
283 {
284 # pladform embed
285 'url': 'https://vk.com/video-76116461_171554880',
286 'only_matching': True,
bdafd88d
S
287 },
288 {
289 'url': 'http://new.vk.com/video205387401_165548505',
290 'only_matching': True,
643dc0fc
CP
291 },
292 {
293 # This video is no longer available, because its author has been blocked.
294 'url': 'https://vk.com/video-10639516_456240611',
295 'only_matching': True,
a640c4d2 296 },
297 {
298 # The video is not available in your region.
299 'url': 'https://vk.com/video-51812607_171445436',
300 'only_matching': True,
301 }]
9032dc28 302
b73612a2 303 @staticmethod
304 def _extract_sibnet_urls(webpage):
305 # https://help.sibnet.ru/?sibnet_video_embed
306 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
307 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
308 webpage)]
309
60d142aa 310 def _real_extract(self, url):
5ad28e7f 311 mobj = self._match_valid_url(url)
ca97a56e
S
312 video_id = mobj.group('videoid')
313
3c989818 314 mv_data = {}
04e88ca2 315 if video_id:
3c989818
RA
316 data = {
317 'act': 'show_inline',
318 'video': video_id,
319 }
04e88ca2 320 # Some videos (removed?) can only be downloaded with list id specified
321 list_id = mobj.group('list_id')
322 if list_id:
3c989818
RA
323 data['list'] = list_id
324
325 payload = self._download_payload('al_video', video_id, data)
326 info_page = payload[1]
327 opts = payload[-1]
328 mv_data = opts.get('mvData') or {}
329 player = opts.get('player') or {}
04e88ca2 330 else:
ca97a56e 331 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 332
3c989818
RA
333 info_page = self._download_webpage(
334 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 335
3c989818
RA
336 error_message = self._html_search_regex(
337 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
338 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
339 info_page, 'error message', default=None)
340 if error_message:
341 raise ExtractorError(error_message, expected=True)
ee48b6a8 342
3c989818
RA
343 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
344 raise ExtractorError(
345 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
346 expected=True)
7f220b2f 347
3c989818 348 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 349
3c989818
RA
350 ERRORS = {
351 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
352 ERROR_COPYRIGHT,
1d1d60f6 353
3c989818
RA
354 r'>The video .*? was removed from public access by request of the copyright holder.<':
355 ERROR_COPYRIGHT,
3d36cea4 356
3c989818
RA
357 r'<!>Please log in or <':
358 'Video %s is only available for registered users, '
359 'use --username and --password options to provide account credentials.',
3d36cea4 360
3c989818
RA
361 r'<!>Unknown error':
362 'Video %s does not exist.',
1aa5172f 363
3c989818
RA
364 r'<!>Видео временно недоступно':
365 'Video %s is temporarily unavailable.',
d919fa33 366
3c989818
RA
367 r'<!>Access denied':
368 'Access denied to video %s.',
643dc0fc 369
3c989818
RA
370 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
371 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 372
3c989818
RA
373 r'<!>This video is no longer available, because its author has been blocked.':
374 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 375
3c989818
RA
376 r'<!>This video is no longer available, because it has been deleted.':
377 'Video %s is no longer available, because it has been deleted.',
a640c4d2 378
3c989818
RA
379 r'<!>The video .+? is not available in your region.':
380 'Video %s is not available in your region.',
381 }
382
383 for error_re, error_msg in ERRORS.items():
384 if re.search(error_re, info_page):
385 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 386
3c989818
RA
387 player = self._parse_json(self._search_regex(
388 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
389 info_page, 'player params'), video_id)
9334f8f1 390
5113b691 391 youtube_url = YoutubeIE._extract_url(info_page)
46478456 392 if youtube_url:
3c989818 393 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 394
09b9c45e 395 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 396 if vimeo_url is not None:
3c989818 397 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 398
c4737bea
S
399 pladform_url = PladformIE._extract_url(info_page)
400 if pladform_url:
3c989818 401 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 402
7a1818c9 403 m_rutube = re.search(
35972ba1 404 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 405 if m_rutube is not None:
7a1818c9
PH
406 rutube_url = self._proto_relative_url(
407 m_rutube.group(1).replace('\\', ''))
408 return self.url_result(rutube_url)
409
e3845525
KM
410 dailymotion_urls = DailymotionIE._extract_urls(info_page)
411 if dailymotion_urls:
412 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
413
3c989818
RA
414 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
415 if odnoklassniki_url:
416 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
417
b73612a2 418 sibnet_urls = self._extract_sibnet_urls(info_page)
419 if sibnet_urls:
420 return self.url_result(sibnet_urls[0])
421
054932f4 422 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 423 if m_opts:
054932f4 424 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
425 if m_opts_url:
426 opts_url = m_opts_url.group(1)
427 if opts_url.startswith('//'):
428 opts_url = 'http:' + opts_url
429 return self.url_result(opts_url)
430
3c989818 431 data = player['params'][0]
475f8a45
S
432 title = unescapeHTML(data['md_title'])
433
424ed37e
S
434 # 2 = live
435 # 3 = post live (finished live)
9cdb0a33
S
436 is_live = data.get('live') == 2
437 if is_live:
475f8a45
S
438 title = self._live_title(title)
439
a7ee8a00 440 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 441 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 442 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 443
70d7b323
S
444 view_count = str_to_int(self._search_regex(
445 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 446 info_page, 'view count', default=None))
8117df4c 447
bf4b3b6b 448 formats = []
475f8a45 449 for format_id, format_url in data.items():
3052a30d
S
450 format_url = url_or_none(format_url)
451 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 452 continue
3089bc74
S
453 if (format_id.startswith(('url', 'cache'))
454 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
455 height = int_or_none(self._search_regex(
456 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
457 formats.append({
458 'format_id': format_id,
459 'url': format_url,
460 'height': height,
461 })
462 elif format_id == 'hls':
463 formats.extend(self._extract_m3u8_formats(
fb4fc449 464 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 465 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
466 elif format_id == 'rtmp':
467 formats.append({
468 'format_id': format_id,
469 'url': format_url,
470 'ext': 'flv',
471 })
913f3292
PH
472 self._sort_formats(formats)
473
5b6cb562 474 subtitles = {}
475 for sub in data.get('subs') or {}:
476 subtitles.setdefault(sub.get('lang', 'en'), []).append({
477 'ext': sub.get('title', '.srt').split('.')[-1],
478 'url': url_or_none(sub.get('url')),
479 })
480
60d142aa 481 return {
220828f2 482 'id': video_id,
913f3292 483 'formats': formats,
475f8a45 484 'title': title,
913f3292
PH
485 'thumbnail': data.get('jpg'),
486 'uploader': data.get('md_author'),
3c989818
RA
487 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
488 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 489 'timestamp': timestamp,
8117df4c 490 'view_count': view_count,
3c989818
RA
491 'like_count': int_or_none(mv_data.get('likes')),
492 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 493 'is_live': is_live,
5b6cb562 494 'subtitles': subtitles,
60d142aa 495 }
469d4c89
WS
496
497
2d19fb50 498class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
499 IE_NAME = 'vk:uservideos'
500 IE_DESC = "VK - User's Videos"
0e6ec3ca 501 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
469d4c89 502 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 503 _TESTS = [{
0e6ec3ca
RA
504 'url': 'https://vk.com/videos-767561',
505 'info_dict': {
506 'id': '-767561_all',
507 },
508 'playlist_mincount': 1150,
509 }, {
510 'url': 'https://vk.com/videos-767561?section=uploaded',
15ec6693 511 'info_dict': {
0e6ec3ca 512 'id': '-767561_uploaded',
15ec6693 513 },
0e6ec3ca
RA
514 'playlist_mincount': 425,
515 }, {
516 'url': 'http://vk.com/videos205387401',
517 'only_matching': True,
dc786d3d
S
518 }, {
519 'url': 'http://vk.com/videos-77521',
520 'only_matching': True,
0436157b
S
521 }, {
522 'url': 'http://vk.com/videos-97664626?section=all',
523 'only_matching': True,
bdafd88d
S
524 }, {
525 'url': 'http://m.vk.com/videos205387401',
526 'only_matching': True,
527 }, {
528 'url': 'http://new.vk.com/videos205387401',
529 'only_matching': True,
dc786d3d 530 }]
0e6ec3ca
RA
531 _PAGE_SIZE = 1000
532 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
dc786d3d 533
0e6ec3ca 534 def _fetch_page(self, page_id, section, page):
3c989818
RA
535 l = self._download_payload('al_video', page_id, {
536 'act': 'load_videos_silent',
0e6ec3ca 537 'offset': page * self._PAGE_SIZE,
3c989818 538 'oid': page_id,
0e6ec3ca
RA
539 'section': section,
540 })[0][section]['list']
dc786d3d 541
3c989818 542 for video in l:
0e6ec3ca 543 v = self._VIDEO._make(video[:2])
3c989818 544 video_id = '%d_%d' % (v.owner_id, v.id)
0e6ec3ca
RA
545 yield self.url_result(
546 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
547
548 def _real_extract(self, url):
5ad28e7f 549 page_id, section = self._match_valid_url(url).groups()
0e6ec3ca
RA
550 if not section:
551 section = 'all'
552
553 entries = OnDemandPagedList(
554 functools.partial(self._fetch_page, page_id, section),
555 self._PAGE_SIZE)
dc786d3d 556
0e6ec3ca 557 return self.playlist_result(entries, '%s_%s' % (page_id, section))
2d19fb50
S
558
559
560class VKWallPostIE(VKBaseIE):
561 IE_NAME = 'vk:wallpost'
562 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
563 _TESTS = [{
564 # public page URL, audio playlist
565 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
566 'info_dict': {
3c989818
RA
567 'id': '-23538238_35',
568 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
569 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
570 },
571 'playlist': [{
572 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
573 'info_dict': {
574 'id': '135220665_111806521',
3c989818 575 'ext': 'mp4',
2d19fb50
S
576 'title': 'Black Shadow - Слепое Верование',
577 'duration': 370,
578 'uploader': 'Black Shadow',
579 'artist': 'Black Shadow',
580 'track': 'Слепое Верование',
581 },
582 }, {
583 'md5': '4cc7e804579122b17ea95af7834c9233',
584 'info_dict': {
585 'id': '135220665_111802303',
3c989818 586 'ext': 'mp4',
2d19fb50
S
587 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
588 'duration': 423,
589 'uploader': 'Black Shadow',
590 'artist': 'Black Shadow',
591 'track': 'Война - Негасимое Бездны Пламя!',
592 },
2d19fb50 593 }],
51815886 594 'params': {
3c989818 595 'skip_download': True,
51815886
S
596 'usenetrc': True,
597 },
2d19fb50
S
598 'skip': 'Requires vk account credentials',
599 }, {
600 # single YouTube embed, no leading -
601 'url': 'https://vk.com/wall85155021_6319',
602 'info_dict': {
603 'id': '85155021_6319',
3c989818 604 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
605 },
606 'playlist_count': 1,
51815886
S
607 'params': {
608 'usenetrc': True,
609 },
2d19fb50
S
610 'skip': 'Requires vk account credentials',
611 }, {
612 # wall page URL
613 'url': 'https://vk.com/wall-23538238_35',
614 'only_matching': True,
615 }, {
616 # mobile wall page URL
617 'url': 'https://m.vk.com/wall-23538238_35',
618 'only_matching': True,
619 }]
3c989818 620 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
0e6ec3ca 621 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
3c989818
RA
622
623 def _decode(self, enc):
624 dec = ''
625 e = n = 0
626 for c in enc:
627 r = self._BASE64_CHARS.index(c)
628 cond = n % 4
629 e = 64 * e + r if cond else r
630 n += 1
631 if cond:
632 dec += chr(255 & e >> (-2 * n & 6))
633 return dec
634
635 def _unmask_url(self, mask_url, vk_id):
636 if 'audio_api_unavailable' in mask_url:
637 extra = mask_url.split('?extra=')[1].split('#')
638 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
639 mask_url = list(self._decode(extra[0]))
640 url_len = len(mask_url)
641 indexes = [None] * url_len
642 index = int(base) ^ vk_id
643 for n in range(url_len - 1, -1, -1):
644 index = (url_len * (n + 1) ^ index + n) % url_len
645 indexes[n] = index
646 for n in range(1, url_len):
647 c = mask_url[n]
648 index = indexes[url_len - 1 - n]
649 mask_url[n] = mask_url[index]
650 mask_url[index] = c
651 mask_url = ''.join(mask_url)
652 return mask_url
2d19fb50
S
653
654 def _real_extract(self, url):
655 post_id = self._match_id(url)
656
3c989818
RA
657 webpage = self._download_payload('wkview', post_id, {
658 'act': 'show',
659 'w': 'wall' + post_id,
660 })[1]
2d19fb50
S
661
662 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 663 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
664
665 entries = []
666
3c989818
RA
667 for audio in re.findall(r'data-audio="([^"]+)', webpage):
668 audio = self._parse_json(unescapeHTML(audio), post_id)
0e6ec3ca 669 a = self._AUDIO._make(audio[:16])
3c989818
RA
670 if not a.url:
671 continue
672 title = unescapeHTML(a.title)
7e70620a 673 performer = unescapeHTML(a.performer)
3c989818
RA
674 entries.append({
675 'id': '%s_%s' % (a.owner_id, a.id),
676 'url': self._unmask_url(a.url, a.ads['vk_id']),
7e70620a
RA
677 'title': '%s - %s' % (performer, title) if performer else title,
678 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
679 'duration': int_or_none(a.duration),
3c989818 680 'uploader': uploader,
7e70620a 681 'artist': performer,
3c989818
RA
682 'track': title,
683 'ext': 'mp4',
684 'protocol': 'm3u8',
685 })
2d19fb50
S
686
687 for video in re.finditer(
688 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
689 entries.append(self.url_result(
690 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
691
692 title = 'Wall post %s' % post_id
693
694 return self.playlist_result(
695 orderedSet(entries), post_id,
696 '%s - %s' % (uploader, title) if uploader else title,
697 description)