]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vk.py
[extractor] Standardize `_live_title`
[yt-dlp.git] / yt_dlp / extractor / vk.py
CommitLineData
dcdb292f 1# coding: utf-8
94a23d2a
PH
2from __future__ import unicode_literals
3
51815886 4import collections
0e6ec3ca 5import functools
60d142aa 6import re
60d142aa
JMF
7
8from .common import InfoExtractor
059cd768 9from ..compat import compat_urlparse
60d142aa 10from ..utils import (
2d19fb50 11 clean_html,
9032dc28 12 ExtractorError,
2d19fb50 13 get_element_by_class,
bf4b3b6b 14 int_or_none,
0e6ec3ca 15 OnDemandPagedList,
1cc79574 16 orderedSet,
ad1bc71a 17 str_or_none,
8117df4c 18 str_to_int,
60d142aa 19 unescapeHTML,
a7ee8a00 20 unified_timestamp,
3052a30d 21 url_or_none,
6e6bc8da 22 urlencode_postdata,
1cc79574 23)
e3845525 24from .dailymotion import DailymotionIE
3c989818 25from .odnoklassniki import OdnoklassnikiIE
c4737bea 26from .pladform import PladformIE
e3845525 27from .vimeo import VimeoIE
5113b691 28from .youtube import YoutubeIE
60d142aa
JMF
29
30
2d19fb50
S
31class VKBaseIE(InfoExtractor):
32 _NETRC_MACHINE = 'vk'
33
34 def _login(self):
68217024 35 username, password = self._get_login_info()
2d19fb50
S
36 if username is None:
37 return
38
39 login_page, url_handle = self._download_webpage_handle(
40 'https://vk.com', None, 'Downloading login page')
41
42 login_form = self._hidden_inputs(login_page)
43
44 login_form.update({
45 'email': username.encode('cp1251'),
46 'pass': password.encode('cp1251'),
47 })
48
e3c1266f
S
49 # vk serves two same remixlhk cookies in Set-Cookie header and expects
50 # first one to be actually set
51 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
52
53 login_page = self._download_webpage(
f0ffaa16 54 'https://vk.com/login', None,
e4d95865 55 note='Logging in',
2d19fb50
S
56 data=urlencode_postdata(login_form))
57
58 if re.search(r'onLoginFailed', login_page):
59 raise ExtractorError(
60 'Unable to login, incorrect username and/or password', expected=True)
61
62 def _real_initialize(self):
63 self._login()
64
3c989818
RA
65 def _download_payload(self, path, video_id, data, fatal=True):
66 data['al'] = 1
67 code, payload = self._download_json(
68 'https://vk.com/%s.php' % path, video_id,
69 data=urlencode_postdata(data), fatal=fatal,
70 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
71 if code == '3':
72 self.raise_login_required()
73 elif code == '8':
74 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
75 return payload
76
2d19fb50
S
77
78class VKIE(VKBaseIE):
1ecb5d1d
S
79 IE_NAME = 'vk'
80 IE_DESC = 'VK'
cf9cf7dd
S
81 _VALID_URL = r'''(?x)
82 https?://
83 (?:
04e88ca2 84 (?:
bdafd88d 85 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 86 (?:www\.)?daxab.com/
87 )
88 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 89 (?:
bdafd88d 90 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
04e88ca2 91 (?:www\.)?daxab.com/embed/
cf9cf7dd 92 )
04e88ca2 93 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
cf9cf7dd
S
94 )
95 '''
9032dc28
S
96 _TESTS = [
97 {
98 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 99 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 100 'info_dict': {
220828f2 101 'id': '-77521_162222515',
09f934b0 102 'ext': 'mp4',
9032dc28 103 'title': 'ProtivoGunz - Хуёвая песня',
36300346 104 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 105 'uploader_id': '-77521',
9032dc28 106 'duration': 195,
ad1bc71a 107 'timestamp': 1329049880,
42e1ff86 108 'upload_date': '20120212',
9032dc28 109 },
60d142aa 110 },
9032dc28 111 {
c52331f3 112 'url': 'http://vk.com/video205387401_165548505',
9032dc28 113 'info_dict': {
220828f2 114 'id': '205387401_165548505',
9032dc28 115 'ext': 'mp4',
c52331f3 116 'title': 'No name',
ad1bc71a
RA
117 'uploader': 'Tom Cruise',
118 'uploader_id': '205387401',
c52331f3 119 'duration': 9,
ad1bc71a
RA
120 'timestamp': 1374364108,
121 'upload_date': '20130720',
9032dc28
S
122 }
123 },
ca97a56e
S
124 {
125 'note': 'Embedded video',
3c989818
RA
126 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
127 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 128 'info_dict': {
3c989818 129 'id': '-77521_162222515',
ca97a56e 130 'ext': 'mp4',
3c989818
RA
131 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
132 'title': 'ProtivoGunz - Хуёвая песня',
133 'duration': 195,
134 'upload_date': '20120212',
135 'timestamp': 1329049880,
136 'uploader_id': '-77521',
04e88ca2 137 },
ca97a56e 138 },
9032dc28 139 {
c52331f3
WS
140 # VIDEO NOW REMOVED
141 # please update if you find a video whose URL follows the same pattern
9032dc28
S
142 'url': 'http://vk.com/video-8871596_164049491',
143 'md5': 'a590bcaf3d543576c9bd162812387666',
144 'note': 'Only available for registered users',
145 'info_dict': {
220828f2 146 'id': '-8871596_164049491',
9032dc28
S
147 'ext': 'mp4',
148 'uploader': 'Триллеры',
57bdc730 149 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 150 'duration': 8352,
8117df4c
S
151 'upload_date': '20121218',
152 'view_count': int,
9032dc28 153 },
3c989818 154 'skip': 'Removed',
ca97a56e 155 },
57bdc730
S
156 {
157 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 158 'info_dict': {
220828f2 159 'id': '-43215063_168067957',
57bdc730 160 'ext': 'mp4',
3c989818 161 'uploader': 'Bro Mazter',
57bdc730
S
162 'title': ' ',
163 'duration': 7291,
42e1ff86 164 'upload_date': '20140328',
3c989818
RA
165 'uploader_id': '223413403',
166 'timestamp': 1396018030,
57bdc730
S
167 },
168 'skip': 'Requires vk account credentials',
169 },
849086a1
S
170 {
171 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
172 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
173 'note': 'ivi.ru embed',
174 'info_dict': {
220828f2 175 'id': '-43215063_169084319',
849086a1
S
176 'ext': 'mp4',
177 'title': 'Книга Илая',
178 'duration': 6771,
42e1ff86 179 'upload_date': '20140626',
8117df4c 180 'view_count': int,
849086a1 181 },
3c989818 182 'skip': 'Removed',
849086a1 183 },
79913fde
S
184 {
185 # video (removed?) only available with list id
186 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
187 'md5': '091287af5402239a1051c37ec7b92913',
188 'info_dict': {
220828f2 189 'id': '30481095_171201961',
79913fde
S
190 'ext': 'mp4',
191 'title': 'ТюменцевВВ_09.07.2015',
192 'uploader': 'Anton Ivanov',
193 'duration': 109,
194 'upload_date': '20150709',
195 'view_count': int,
196 },
a7ee8a00 197 'skip': 'Removed',
79913fde 198 },
9281f6d2
S
199 {
200 # youtube embed
201 'url': 'https://vk.com/video276849682_170681728',
202 'info_dict': {
203 'id': 'V3K4mi0SYkc',
220828f2 204 'ext': 'mp4',
9281f6d2 205 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 206 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 207 'duration': 178,
9281f6d2 208 'upload_date': '20130116',
ad1bc71a 209 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
210 'uploader_id': 'thecjf',
211 'view_count': int,
212 },
213 },
e3845525
KM
214 {
215 # dailymotion embed
216 'url': 'https://vk.com/video-37468416_456239855',
217 'info_dict': {
218 'id': 'k3lz2cmXyRuJQSjGHUv',
219 'ext': 'mp4',
220 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
5ef62fc4 221 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
e3845525
KM
222 'uploader': 'AniLibria.Tv',
223 'upload_date': '20160914',
224 'uploader_id': 'x1p5vl5',
225 'timestamp': 1473877246,
226 },
227 'params': {
228 'skip_download': True,
93aa0b63 229 },
e3845525 230 },
bf4b3b6b
S
231 {
232 # video key is extra_data not url\d+
233 'url': 'http://vk.com/video-110305615_171782105',
234 'md5': 'e13fcda136f99764872e739d13fac1d1',
235 'info_dict': {
220828f2 236 'id': '-110305615_171782105',
bf4b3b6b
S
237 'ext': 'mp4',
238 'title': 'S-Dance, репетиции к The way show',
239 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
240 'uploader_id': '-110305615',
241 'timestamp': 1454859345,
bf4b3b6b 242 'upload_date': '20160207',
ad1bc71a
RA
243 },
244 'params': {
245 'skip_download': True,
bf4b3b6b
S
246 },
247 },
93aa0b63 248 {
424ed37e 249 # finished live stream, postlive_mp4
93aa0b63 250 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 251 'info_dict': {
220828f2 252 'id': '-387766_456242764',
93aa0b63 253 'ext': 'mp4',
220828f2 254 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
255 'uploader': 'Игромания',
256 'duration': 5239,
220828f2
RA
257 # TODO: use act=show to extract view_count
258 # 'view_count': int,
259 'upload_date': '20160929',
260 'uploader_id': '-387766',
261 'timestamp': 1475137527,
93aa0b63 262 },
3c989818
RA
263 'params': {
264 'skip_download': True,
265 },
93aa0b63 266 },
475f8a45 267 {
424ed37e 268 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
269 # stream by the time you are reading this comment
270 'url': 'https://vk.com/video-140332_456239111',
271 'only_matching': True,
272 },
a8363f3a
PH
273 {
274 # removed video, just testing that we match the pattern
275 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
276 'only_matching': True,
277 },
e58066e2
S
278 {
279 # age restricted video, requires vk account credentials
280 'url': 'https://vk.com/video205387401_164765225',
281 'only_matching': True,
282 },
a5e52a1f
S
283 {
284 # pladform embed
285 'url': 'https://vk.com/video-76116461_171554880',
286 'only_matching': True,
bdafd88d
S
287 },
288 {
289 'url': 'http://new.vk.com/video205387401_165548505',
290 'only_matching': True,
643dc0fc
CP
291 },
292 {
293 # This video is no longer available, because its author has been blocked.
294 'url': 'https://vk.com/video-10639516_456240611',
295 'only_matching': True,
a640c4d2 296 },
297 {
298 # The video is not available in your region.
299 'url': 'https://vk.com/video-51812607_171445436',
300 'only_matching': True,
301 }]
9032dc28 302
b73612a2 303 @staticmethod
304 def _extract_sibnet_urls(webpage):
305 # https://help.sibnet.ru/?sibnet_video_embed
306 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
307 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
308 webpage)]
309
60d142aa 310 def _real_extract(self, url):
5ad28e7f 311 mobj = self._match_valid_url(url)
ca97a56e
S
312 video_id = mobj.group('videoid')
313
3c989818 314 mv_data = {}
04e88ca2 315 if video_id:
3c989818
RA
316 data = {
317 'act': 'show_inline',
318 'video': video_id,
319 }
04e88ca2 320 # Some videos (removed?) can only be downloaded with list id specified
321 list_id = mobj.group('list_id')
322 if list_id:
3c989818
RA
323 data['list'] = list_id
324
325 payload = self._download_payload('al_video', video_id, data)
326 info_page = payload[1]
327 opts = payload[-1]
328 mv_data = opts.get('mvData') or {}
329 player = opts.get('player') or {}
04e88ca2 330 else:
ca97a56e 331 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 332
3c989818
RA
333 info_page = self._download_webpage(
334 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 335
3c989818
RA
336 error_message = self._html_search_regex(
337 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
338 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
339 info_page, 'error message', default=None)
340 if error_message:
341 raise ExtractorError(error_message, expected=True)
ee48b6a8 342
3c989818
RA
343 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
344 raise ExtractorError(
345 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
346 expected=True)
7f220b2f 347
3c989818 348 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 349
3c989818
RA
350 ERRORS = {
351 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
352 ERROR_COPYRIGHT,
1d1d60f6 353
3c989818
RA
354 r'>The video .*? was removed from public access by request of the copyright holder.<':
355 ERROR_COPYRIGHT,
3d36cea4 356
3c989818
RA
357 r'<!>Please log in or <':
358 'Video %s is only available for registered users, '
359 'use --username and --password options to provide account credentials.',
3d36cea4 360
3c989818
RA
361 r'<!>Unknown error':
362 'Video %s does not exist.',
1aa5172f 363
3c989818
RA
364 r'<!>Видео временно недоступно':
365 'Video %s is temporarily unavailable.',
d919fa33 366
3c989818
RA
367 r'<!>Access denied':
368 'Access denied to video %s.',
643dc0fc 369
3c989818
RA
370 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
371 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 372
3c989818
RA
373 r'<!>This video is no longer available, because its author has been blocked.':
374 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 375
3c989818
RA
376 r'<!>This video is no longer available, because it has been deleted.':
377 'Video %s is no longer available, because it has been deleted.',
a640c4d2 378
3c989818
RA
379 r'<!>The video .+? is not available in your region.':
380 'Video %s is not available in your region.',
381 }
382
383 for error_re, error_msg in ERRORS.items():
384 if re.search(error_re, info_page):
385 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 386
3c989818
RA
387 player = self._parse_json(self._search_regex(
388 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
389 info_page, 'player params'), video_id)
9334f8f1 390
5113b691 391 youtube_url = YoutubeIE._extract_url(info_page)
46478456 392 if youtube_url:
3c989818 393 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 394
09b9c45e 395 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 396 if vimeo_url is not None:
3c989818 397 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 398
c4737bea
S
399 pladform_url = PladformIE._extract_url(info_page)
400 if pladform_url:
3c989818 401 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 402
7a1818c9 403 m_rutube = re.search(
35972ba1 404 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 405 if m_rutube is not None:
7a1818c9
PH
406 rutube_url = self._proto_relative_url(
407 m_rutube.group(1).replace('\\', ''))
408 return self.url_result(rutube_url)
409
e3845525
KM
410 dailymotion_urls = DailymotionIE._extract_urls(info_page)
411 if dailymotion_urls:
412 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
413
3c989818
RA
414 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
415 if odnoklassniki_url:
416 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
417
b73612a2 418 sibnet_urls = self._extract_sibnet_urls(info_page)
419 if sibnet_urls:
420 return self.url_result(sibnet_urls[0])
421
054932f4 422 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 423 if m_opts:
054932f4 424 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
425 if m_opts_url:
426 opts_url = m_opts_url.group(1)
427 if opts_url.startswith('//'):
428 opts_url = 'http:' + opts_url
429 return self.url_result(opts_url)
430
3c989818 431 data = player['params'][0]
475f8a45
S
432 title = unescapeHTML(data['md_title'])
433
424ed37e
S
434 # 2 = live
435 # 3 = post live (finished live)
9cdb0a33 436 is_live = data.get('live') == 2
475f8a45 437
a7ee8a00 438 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 439 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 440 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 441
70d7b323
S
442 view_count = str_to_int(self._search_regex(
443 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 444 info_page, 'view count', default=None))
8117df4c 445
bf4b3b6b 446 formats = []
475f8a45 447 for format_id, format_url in data.items():
3052a30d
S
448 format_url = url_or_none(format_url)
449 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 450 continue
3089bc74
S
451 if (format_id.startswith(('url', 'cache'))
452 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
453 height = int_or_none(self._search_regex(
454 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
455 formats.append({
456 'format_id': format_id,
457 'url': format_url,
458 'height': height,
459 })
460 elif format_id == 'hls':
461 formats.extend(self._extract_m3u8_formats(
fb4fc449 462 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 463 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
464 elif format_id == 'rtmp':
465 formats.append({
466 'format_id': format_id,
467 'url': format_url,
468 'ext': 'flv',
469 })
913f3292
PH
470 self._sort_formats(formats)
471
5b6cb562 472 subtitles = {}
473 for sub in data.get('subs') or {}:
474 subtitles.setdefault(sub.get('lang', 'en'), []).append({
475 'ext': sub.get('title', '.srt').split('.')[-1],
476 'url': url_or_none(sub.get('url')),
477 })
478
60d142aa 479 return {
220828f2 480 'id': video_id,
913f3292 481 'formats': formats,
475f8a45 482 'title': title,
913f3292
PH
483 'thumbnail': data.get('jpg'),
484 'uploader': data.get('md_author'),
3c989818
RA
485 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
486 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 487 'timestamp': timestamp,
8117df4c 488 'view_count': view_count,
3c989818
RA
489 'like_count': int_or_none(mv_data.get('likes')),
490 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 491 'is_live': is_live,
5b6cb562 492 'subtitles': subtitles,
60d142aa 493 }
469d4c89
WS
494
495
2d19fb50 496class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
497 IE_NAME = 'vk:uservideos'
498 IE_DESC = "VK - User's Videos"
0e6ec3ca 499 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
469d4c89 500 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 501 _TESTS = [{
0e6ec3ca
RA
502 'url': 'https://vk.com/videos-767561',
503 'info_dict': {
504 'id': '-767561_all',
505 },
506 'playlist_mincount': 1150,
507 }, {
508 'url': 'https://vk.com/videos-767561?section=uploaded',
15ec6693 509 'info_dict': {
0e6ec3ca 510 'id': '-767561_uploaded',
15ec6693 511 },
0e6ec3ca
RA
512 'playlist_mincount': 425,
513 }, {
514 'url': 'http://vk.com/videos205387401',
515 'only_matching': True,
dc786d3d
S
516 }, {
517 'url': 'http://vk.com/videos-77521',
518 'only_matching': True,
0436157b
S
519 }, {
520 'url': 'http://vk.com/videos-97664626?section=all',
521 'only_matching': True,
bdafd88d
S
522 }, {
523 'url': 'http://m.vk.com/videos205387401',
524 'only_matching': True,
525 }, {
526 'url': 'http://new.vk.com/videos205387401',
527 'only_matching': True,
dc786d3d 528 }]
0e6ec3ca
RA
529 _PAGE_SIZE = 1000
530 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
dc786d3d 531
0e6ec3ca 532 def _fetch_page(self, page_id, section, page):
3c989818
RA
533 l = self._download_payload('al_video', page_id, {
534 'act': 'load_videos_silent',
0e6ec3ca 535 'offset': page * self._PAGE_SIZE,
3c989818 536 'oid': page_id,
0e6ec3ca
RA
537 'section': section,
538 })[0][section]['list']
dc786d3d 539
3c989818 540 for video in l:
0e6ec3ca 541 v = self._VIDEO._make(video[:2])
3c989818 542 video_id = '%d_%d' % (v.owner_id, v.id)
0e6ec3ca
RA
543 yield self.url_result(
544 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
545
546 def _real_extract(self, url):
5ad28e7f 547 page_id, section = self._match_valid_url(url).groups()
0e6ec3ca
RA
548 if not section:
549 section = 'all'
550
551 entries = OnDemandPagedList(
552 functools.partial(self._fetch_page, page_id, section),
553 self._PAGE_SIZE)
dc786d3d 554
0e6ec3ca 555 return self.playlist_result(entries, '%s_%s' % (page_id, section))
2d19fb50
S
556
557
558class VKWallPostIE(VKBaseIE):
559 IE_NAME = 'vk:wallpost'
560 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
561 _TESTS = [{
562 # public page URL, audio playlist
563 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
564 'info_dict': {
3c989818
RA
565 'id': '-23538238_35',
566 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
567 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
568 },
569 'playlist': [{
570 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
571 'info_dict': {
572 'id': '135220665_111806521',
3c989818 573 'ext': 'mp4',
2d19fb50
S
574 'title': 'Black Shadow - Слепое Верование',
575 'duration': 370,
576 'uploader': 'Black Shadow',
577 'artist': 'Black Shadow',
578 'track': 'Слепое Верование',
579 },
580 }, {
581 'md5': '4cc7e804579122b17ea95af7834c9233',
582 'info_dict': {
583 'id': '135220665_111802303',
3c989818 584 'ext': 'mp4',
2d19fb50
S
585 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
586 'duration': 423,
587 'uploader': 'Black Shadow',
588 'artist': 'Black Shadow',
589 'track': 'Война - Негасимое Бездны Пламя!',
590 },
2d19fb50 591 }],
51815886 592 'params': {
3c989818 593 'skip_download': True,
51815886
S
594 'usenetrc': True,
595 },
2d19fb50
S
596 'skip': 'Requires vk account credentials',
597 }, {
598 # single YouTube embed, no leading -
599 'url': 'https://vk.com/wall85155021_6319',
600 'info_dict': {
601 'id': '85155021_6319',
3c989818 602 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
603 },
604 'playlist_count': 1,
51815886
S
605 'params': {
606 'usenetrc': True,
607 },
2d19fb50
S
608 'skip': 'Requires vk account credentials',
609 }, {
610 # wall page URL
611 'url': 'https://vk.com/wall-23538238_35',
612 'only_matching': True,
613 }, {
614 # mobile wall page URL
615 'url': 'https://m.vk.com/wall-23538238_35',
616 'only_matching': True,
617 }]
3c989818 618 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
0e6ec3ca 619 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
3c989818
RA
620
621 def _decode(self, enc):
622 dec = ''
623 e = n = 0
624 for c in enc:
625 r = self._BASE64_CHARS.index(c)
626 cond = n % 4
627 e = 64 * e + r if cond else r
628 n += 1
629 if cond:
630 dec += chr(255 & e >> (-2 * n & 6))
631 return dec
632
633 def _unmask_url(self, mask_url, vk_id):
634 if 'audio_api_unavailable' in mask_url:
635 extra = mask_url.split('?extra=')[1].split('#')
636 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
637 mask_url = list(self._decode(extra[0]))
638 url_len = len(mask_url)
639 indexes = [None] * url_len
640 index = int(base) ^ vk_id
641 for n in range(url_len - 1, -1, -1):
642 index = (url_len * (n + 1) ^ index + n) % url_len
643 indexes[n] = index
644 for n in range(1, url_len):
645 c = mask_url[n]
646 index = indexes[url_len - 1 - n]
647 mask_url[n] = mask_url[index]
648 mask_url[index] = c
649 mask_url = ''.join(mask_url)
650 return mask_url
2d19fb50
S
651
652 def _real_extract(self, url):
653 post_id = self._match_id(url)
654
3c989818
RA
655 webpage = self._download_payload('wkview', post_id, {
656 'act': 'show',
657 'w': 'wall' + post_id,
658 })[1]
2d19fb50
S
659
660 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 661 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
662
663 entries = []
664
3c989818
RA
665 for audio in re.findall(r'data-audio="([^"]+)', webpage):
666 audio = self._parse_json(unescapeHTML(audio), post_id)
0e6ec3ca 667 a = self._AUDIO._make(audio[:16])
3c989818
RA
668 if not a.url:
669 continue
670 title = unescapeHTML(a.title)
7e70620a 671 performer = unescapeHTML(a.performer)
3c989818
RA
672 entries.append({
673 'id': '%s_%s' % (a.owner_id, a.id),
674 'url': self._unmask_url(a.url, a.ads['vk_id']),
7e70620a
RA
675 'title': '%s - %s' % (performer, title) if performer else title,
676 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
677 'duration': int_or_none(a.duration),
3c989818 678 'uploader': uploader,
7e70620a 679 'artist': performer,
3c989818
RA
680 'track': title,
681 'ext': 'mp4',
682 'protocol': 'm3u8',
683 })
2d19fb50
S
684
685 for video in re.finditer(
686 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
687 entries.append(self.url_result(
688 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
689
690 title = 'Wall post %s' % post_id
691
692 return self.playlist_result(
693 orderedSet(entries), post_id,
694 '%s - %s' % (uploader, title) if uploader else title,
695 description)