]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vk.py
[extractor] Add `_perform_login` function (#2943)
[yt-dlp.git] / yt_dlp / extractor / vk.py
CommitLineData
dcdb292f 1# coding: utf-8
94a23d2a
PH
2from __future__ import unicode_literals
3
51815886 4import collections
60d142aa 5import re
60d142aa
JMF
6
7from .common import InfoExtractor
059cd768 8from ..compat import compat_urlparse
60d142aa 9from ..utils import (
2d19fb50 10 clean_html,
9032dc28 11 ExtractorError,
2d19fb50 12 get_element_by_class,
bf4b3b6b 13 int_or_none,
1cc79574 14 orderedSet,
ad1bc71a 15 str_or_none,
8117df4c 16 str_to_int,
60d142aa 17 unescapeHTML,
a7ee8a00 18 unified_timestamp,
3052a30d 19 url_or_none,
6e6bc8da 20 urlencode_postdata,
1cc79574 21)
e3845525 22from .dailymotion import DailymotionIE
3c989818 23from .odnoklassniki import OdnoklassnikiIE
c4737bea 24from .pladform import PladformIE
e3845525 25from .vimeo import VimeoIE
5113b691 26from .youtube import YoutubeIE
60d142aa
JMF
27
28
2d19fb50
S
29class VKBaseIE(InfoExtractor):
30 _NETRC_MACHINE = 'vk'
31
52efa4b3 32 def _perform_login(self, username, password):
2d19fb50
S
33 login_page, url_handle = self._download_webpage_handle(
34 'https://vk.com', None, 'Downloading login page')
35
36 login_form = self._hidden_inputs(login_page)
37
38 login_form.update({
39 'email': username.encode('cp1251'),
40 'pass': password.encode('cp1251'),
41 })
42
e3c1266f
S
43 # vk serves two same remixlhk cookies in Set-Cookie header and expects
44 # first one to be actually set
45 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
46
47 login_page = self._download_webpage(
f0ffaa16 48 'https://vk.com/login', None,
e4d95865 49 note='Logging in',
2d19fb50
S
50 data=urlencode_postdata(login_form))
51
52 if re.search(r'onLoginFailed', login_page):
53 raise ExtractorError(
54 'Unable to login, incorrect username and/or password', expected=True)
55
3c989818
RA
56 def _download_payload(self, path, video_id, data, fatal=True):
57 data['al'] = 1
58 code, payload = self._download_json(
59 'https://vk.com/%s.php' % path, video_id,
60 data=urlencode_postdata(data), fatal=fatal,
61 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
62 if code == '3':
63 self.raise_login_required()
64 elif code == '8':
65 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
66 return payload
67
2d19fb50
S
68
69class VKIE(VKBaseIE):
1ecb5d1d
S
70 IE_NAME = 'vk'
71 IE_DESC = 'VK'
cf9cf7dd
S
72 _VALID_URL = r'''(?x)
73 https?://
74 (?:
04e88ca2 75 (?:
bdafd88d 76 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 77 (?:www\.)?daxab.com/
78 )
79 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 80 (?:
21df2117 81 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
04e88ca2 82 (?:www\.)?daxab.com/embed/
cf9cf7dd 83 )
af3cbd87 84 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
cf9cf7dd
S
85 )
86 '''
9032dc28
S
87 _TESTS = [
88 {
89 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 90 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 91 'info_dict': {
220828f2 92 'id': '-77521_162222515',
09f934b0 93 'ext': 'mp4',
9032dc28 94 'title': 'ProtivoGunz - Хуёвая песня',
36300346 95 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 96 'uploader_id': '-77521',
9032dc28 97 'duration': 195,
ad1bc71a 98 'timestamp': 1329049880,
42e1ff86 99 'upload_date': '20120212',
9032dc28 100 },
60d142aa 101 },
9032dc28 102 {
c52331f3 103 'url': 'http://vk.com/video205387401_165548505',
9032dc28 104 'info_dict': {
220828f2 105 'id': '205387401_165548505',
9032dc28 106 'ext': 'mp4',
c52331f3 107 'title': 'No name',
ad1bc71a
RA
108 'uploader': 'Tom Cruise',
109 'uploader_id': '205387401',
c52331f3 110 'duration': 9,
ad1bc71a
RA
111 'timestamp': 1374364108,
112 'upload_date': '20130720',
9032dc28
S
113 }
114 },
ca97a56e
S
115 {
116 'note': 'Embedded video',
3c989818
RA
117 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
118 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 119 'info_dict': {
3c989818 120 'id': '-77521_162222515',
ca97a56e 121 'ext': 'mp4',
3c989818
RA
122 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
123 'title': 'ProtivoGunz - Хуёвая песня',
124 'duration': 195,
125 'upload_date': '20120212',
126 'timestamp': 1329049880,
127 'uploader_id': '-77521',
04e88ca2 128 },
ca97a56e 129 },
9032dc28 130 {
c52331f3
WS
131 # VIDEO NOW REMOVED
132 # please update if you find a video whose URL follows the same pattern
9032dc28
S
133 'url': 'http://vk.com/video-8871596_164049491',
134 'md5': 'a590bcaf3d543576c9bd162812387666',
135 'note': 'Only available for registered users',
136 'info_dict': {
220828f2 137 'id': '-8871596_164049491',
9032dc28
S
138 'ext': 'mp4',
139 'uploader': 'Триллеры',
57bdc730 140 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 141 'duration': 8352,
8117df4c
S
142 'upload_date': '20121218',
143 'view_count': int,
9032dc28 144 },
3c989818 145 'skip': 'Removed',
ca97a56e 146 },
57bdc730
S
147 {
148 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 149 'info_dict': {
220828f2 150 'id': '-43215063_168067957',
57bdc730 151 'ext': 'mp4',
3c989818 152 'uploader': 'Bro Mazter',
57bdc730
S
153 'title': ' ',
154 'duration': 7291,
42e1ff86 155 'upload_date': '20140328',
3c989818
RA
156 'uploader_id': '223413403',
157 'timestamp': 1396018030,
57bdc730
S
158 },
159 'skip': 'Requires vk account credentials',
160 },
849086a1
S
161 {
162 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
163 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
164 'note': 'ivi.ru embed',
165 'info_dict': {
220828f2 166 'id': '-43215063_169084319',
849086a1
S
167 'ext': 'mp4',
168 'title': 'Книга Илая',
169 'duration': 6771,
42e1ff86 170 'upload_date': '20140626',
8117df4c 171 'view_count': int,
849086a1 172 },
3c989818 173 'skip': 'Removed',
849086a1 174 },
af3cbd87 175 {
176 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
177 'info_dict': {
178 'id': '-93049196_456239755',
179 'ext': 'mp4',
180 'title': '8 серия (озвучка)',
181 'duration': 8383,
182 'upload_date': '20211222',
183 'view_count': int,
184 },
185 },
79913fde
S
186 {
187 # video (removed?) only available with list id
188 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
189 'md5': '091287af5402239a1051c37ec7b92913',
190 'info_dict': {
220828f2 191 'id': '30481095_171201961',
79913fde
S
192 'ext': 'mp4',
193 'title': 'ТюменцевВВ_09.07.2015',
194 'uploader': 'Anton Ivanov',
195 'duration': 109,
196 'upload_date': '20150709',
197 'view_count': int,
198 },
a7ee8a00 199 'skip': 'Removed',
79913fde 200 },
9281f6d2
S
201 {
202 # youtube embed
203 'url': 'https://vk.com/video276849682_170681728',
204 'info_dict': {
205 'id': 'V3K4mi0SYkc',
220828f2 206 'ext': 'mp4',
9281f6d2 207 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 208 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 209 'duration': 178,
9281f6d2 210 'upload_date': '20130116',
ad1bc71a 211 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
212 'uploader_id': 'thecjf',
213 'view_count': int,
214 },
215 },
e3845525
KM
216 {
217 # dailymotion embed
218 'url': 'https://vk.com/video-37468416_456239855',
219 'info_dict': {
220 'id': 'k3lz2cmXyRuJQSjGHUv',
221 'ext': 'mp4',
222 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
5ef62fc4 223 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
e3845525
KM
224 'uploader': 'AniLibria.Tv',
225 'upload_date': '20160914',
226 'uploader_id': 'x1p5vl5',
227 'timestamp': 1473877246,
228 },
229 'params': {
230 'skip_download': True,
93aa0b63 231 },
e3845525 232 },
bf4b3b6b
S
233 {
234 # video key is extra_data not url\d+
235 'url': 'http://vk.com/video-110305615_171782105',
236 'md5': 'e13fcda136f99764872e739d13fac1d1',
237 'info_dict': {
220828f2 238 'id': '-110305615_171782105',
bf4b3b6b
S
239 'ext': 'mp4',
240 'title': 'S-Dance, репетиции к The way show',
241 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
242 'uploader_id': '-110305615',
243 'timestamp': 1454859345,
bf4b3b6b 244 'upload_date': '20160207',
ad1bc71a
RA
245 },
246 'params': {
247 'skip_download': True,
bf4b3b6b
S
248 },
249 },
93aa0b63 250 {
424ed37e 251 # finished live stream, postlive_mp4
93aa0b63 252 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 253 'info_dict': {
220828f2 254 'id': '-387766_456242764',
93aa0b63 255 'ext': 'mp4',
220828f2 256 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
257 'uploader': 'Игромания',
258 'duration': 5239,
220828f2
RA
259 # TODO: use act=show to extract view_count
260 # 'view_count': int,
261 'upload_date': '20160929',
262 'uploader_id': '-387766',
263 'timestamp': 1475137527,
93aa0b63 264 },
3c989818
RA
265 'params': {
266 'skip_download': True,
267 },
93aa0b63 268 },
475f8a45 269 {
424ed37e 270 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
271 # stream by the time you are reading this comment
272 'url': 'https://vk.com/video-140332_456239111',
273 'only_matching': True,
274 },
a8363f3a
PH
275 {
276 # removed video, just testing that we match the pattern
277 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
278 'only_matching': True,
279 },
e58066e2
S
280 {
281 # age restricted video, requires vk account credentials
282 'url': 'https://vk.com/video205387401_164765225',
283 'only_matching': True,
284 },
a5e52a1f
S
285 {
286 # pladform embed
287 'url': 'https://vk.com/video-76116461_171554880',
288 'only_matching': True,
bdafd88d
S
289 },
290 {
291 'url': 'http://new.vk.com/video205387401_165548505',
292 'only_matching': True,
643dc0fc
CP
293 },
294 {
295 # This video is no longer available, because its author has been blocked.
296 'url': 'https://vk.com/video-10639516_456240611',
297 'only_matching': True,
a640c4d2 298 },
299 {
300 # The video is not available in your region.
301 'url': 'https://vk.com/video-51812607_171445436',
302 'only_matching': True,
21df2117 303 },
304 {
305 'url': 'https://vk.com/clip30014565_456240946',
306 'only_matching': True,
a640c4d2 307 }]
9032dc28 308
b73612a2 309 @staticmethod
310 def _extract_sibnet_urls(webpage):
311 # https://help.sibnet.ru/?sibnet_video_embed
312 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
313 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
314 webpage)]
315
60d142aa 316 def _real_extract(self, url):
5ad28e7f 317 mobj = self._match_valid_url(url)
ca97a56e
S
318 video_id = mobj.group('videoid')
319
3c989818 320 mv_data = {}
04e88ca2 321 if video_id:
3c989818
RA
322 data = {
323 'act': 'show_inline',
324 'video': video_id,
325 }
04e88ca2 326 # Some videos (removed?) can only be downloaded with list id specified
327 list_id = mobj.group('list_id')
328 if list_id:
3c989818
RA
329 data['list'] = list_id
330
331 payload = self._download_payload('al_video', video_id, data)
332 info_page = payload[1]
333 opts = payload[-1]
334 mv_data = opts.get('mvData') or {}
335 player = opts.get('player') or {}
04e88ca2 336 else:
ca97a56e 337 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 338
3c989818
RA
339 info_page = self._download_webpage(
340 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 341
3c989818
RA
342 error_message = self._html_search_regex(
343 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
344 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
345 info_page, 'error message', default=None)
346 if error_message:
347 raise ExtractorError(error_message, expected=True)
ee48b6a8 348
3c989818
RA
349 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
350 raise ExtractorError(
351 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
352 expected=True)
7f220b2f 353
3c989818 354 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 355
3c989818
RA
356 ERRORS = {
357 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
358 ERROR_COPYRIGHT,
1d1d60f6 359
3c989818
RA
360 r'>The video .*? was removed from public access by request of the copyright holder.<':
361 ERROR_COPYRIGHT,
3d36cea4 362
3c989818
RA
363 r'<!>Please log in or <':
364 'Video %s is only available for registered users, '
365 'use --username and --password options to provide account credentials.',
3d36cea4 366
3c989818
RA
367 r'<!>Unknown error':
368 'Video %s does not exist.',
1aa5172f 369
3c989818
RA
370 r'<!>Видео временно недоступно':
371 'Video %s is temporarily unavailable.',
d919fa33 372
3c989818
RA
373 r'<!>Access denied':
374 'Access denied to video %s.',
643dc0fc 375
3c989818
RA
376 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
377 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 378
3c989818
RA
379 r'<!>This video is no longer available, because its author has been blocked.':
380 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 381
3c989818
RA
382 r'<!>This video is no longer available, because it has been deleted.':
383 'Video %s is no longer available, because it has been deleted.',
a640c4d2 384
3c989818
RA
385 r'<!>The video .+? is not available in your region.':
386 'Video %s is not available in your region.',
387 }
388
389 for error_re, error_msg in ERRORS.items():
390 if re.search(error_re, info_page):
391 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 392
3c989818
RA
393 player = self._parse_json(self._search_regex(
394 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
395 info_page, 'player params'), video_id)
9334f8f1 396
5113b691 397 youtube_url = YoutubeIE._extract_url(info_page)
46478456 398 if youtube_url:
3c989818 399 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 400
09b9c45e 401 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 402 if vimeo_url is not None:
3c989818 403 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 404
c4737bea
S
405 pladform_url = PladformIE._extract_url(info_page)
406 if pladform_url:
3c989818 407 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 408
7a1818c9 409 m_rutube = re.search(
35972ba1 410 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 411 if m_rutube is not None:
7a1818c9
PH
412 rutube_url = self._proto_relative_url(
413 m_rutube.group(1).replace('\\', ''))
414 return self.url_result(rutube_url)
415
e3845525
KM
416 dailymotion_urls = DailymotionIE._extract_urls(info_page)
417 if dailymotion_urls:
418 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
419
3c989818
RA
420 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
421 if odnoklassniki_url:
422 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
423
b73612a2 424 sibnet_urls = self._extract_sibnet_urls(info_page)
425 if sibnet_urls:
426 return self.url_result(sibnet_urls[0])
427
054932f4 428 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 429 if m_opts:
054932f4 430 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
431 if m_opts_url:
432 opts_url = m_opts_url.group(1)
433 if opts_url.startswith('//'):
434 opts_url = 'http:' + opts_url
435 return self.url_result(opts_url)
436
3c989818 437 data = player['params'][0]
475f8a45
S
438 title = unescapeHTML(data['md_title'])
439
424ed37e
S
440 # 2 = live
441 # 3 = post live (finished live)
9cdb0a33 442 is_live = data.get('live') == 2
475f8a45 443
a7ee8a00 444 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 445 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 446 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 447
70d7b323
S
448 view_count = str_to_int(self._search_regex(
449 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 450 info_page, 'view count', default=None))
8117df4c 451
bf4b3b6b 452 formats = []
475f8a45 453 for format_id, format_url in data.items():
3052a30d
S
454 format_url = url_or_none(format_url)
455 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 456 continue
3089bc74
S
457 if (format_id.startswith(('url', 'cache'))
458 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
459 height = int_or_none(self._search_regex(
460 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
461 formats.append({
462 'format_id': format_id,
463 'url': format_url,
464 'height': height,
465 })
466 elif format_id == 'hls':
467 formats.extend(self._extract_m3u8_formats(
fb4fc449 468 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 469 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
470 elif format_id == 'rtmp':
471 formats.append({
472 'format_id': format_id,
473 'url': format_url,
474 'ext': 'flv',
475 })
913f3292
PH
476 self._sort_formats(formats)
477
5b6cb562 478 subtitles = {}
479 for sub in data.get('subs') or {}:
480 subtitles.setdefault(sub.get('lang', 'en'), []).append({
481 'ext': sub.get('title', '.srt').split('.')[-1],
482 'url': url_or_none(sub.get('url')),
483 })
484
60d142aa 485 return {
220828f2 486 'id': video_id,
913f3292 487 'formats': formats,
475f8a45 488 'title': title,
913f3292
PH
489 'thumbnail': data.get('jpg'),
490 'uploader': data.get('md_author'),
3c989818
RA
491 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
492 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 493 'timestamp': timestamp,
8117df4c 494 'view_count': view_count,
3c989818
RA
495 'like_count': int_or_none(mv_data.get('likes')),
496 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 497 'is_live': is_live,
5b6cb562 498 'subtitles': subtitles,
60d142aa 499 }
469d4c89
WS
500
501
2d19fb50 502class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
503 IE_NAME = 'vk:uservideos'
504 IE_DESC = "VK - User's Videos"
a70b71e8 505 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/@(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
469d4c89 506 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 507 _TESTS = [{
a70b71e8 508 'url': 'https://vk.com/video/@mobidevices',
0e6ec3ca 509 'info_dict': {
a70b71e8 510 'id': '-17892518_all',
0e6ec3ca 511 },
a70b71e8 512 'playlist_mincount': 1355,
0e6ec3ca 513 }, {
a70b71e8 514 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
15ec6693 515 'info_dict': {
a70b71e8 516 'id': '-17892518_uploaded',
15ec6693 517 },
a70b71e8 518 'playlist_mincount': 182,
dc786d3d 519 }]
0e6ec3ca 520 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
dc786d3d 521
a70b71e8
AG
522 def _entries(self, page_id, section):
523 video_list_json = self._download_payload('al_video', page_id, {
3c989818 524 'act': 'load_videos_silent',
a70b71e8 525 'offset': 0,
3c989818 526 'oid': page_id,
0e6ec3ca 527 'section': section,
a70b71e8
AG
528 })[0][section]
529 count = video_list_json['count']
530 total = video_list_json['total']
531 video_list = video_list_json['list']
532
533 while True:
534 for video in video_list:
535 v = self._VIDEO._make(video[:2])
536 video_id = '%d_%d' % (v.owner_id, v.id)
537 yield self.url_result(
538 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
539 if count >= total:
540 break
541 video_list_json = self._download_payload('al_video', page_id, {
542 'act': 'load_videos_silent',
543 'offset': count,
544 'oid': page_id,
545 'section': section,
546 })[0][section]
547 count += video_list_json['count']
548 video_list = video_list_json['list']
0e6ec3ca
RA
549
550 def _real_extract(self, url):
a70b71e8
AG
551 u_id, section = self._match_valid_url(url).groups()
552 webpage = self._download_webpage(url, u_id)
553 page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
0e6ec3ca
RA
554 if not section:
555 section = 'all'
556
a70b71e8 557 return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section))
2d19fb50
S
558
559
560class VKWallPostIE(VKBaseIE):
561 IE_NAME = 'vk:wallpost'
562 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
563 _TESTS = [{
564 # public page URL, audio playlist
565 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
566 'info_dict': {
3c989818
RA
567 'id': '-23538238_35',
568 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
569 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
570 },
571 'playlist': [{
572 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
573 'info_dict': {
574 'id': '135220665_111806521',
3c989818 575 'ext': 'mp4',
2d19fb50
S
576 'title': 'Black Shadow - Слепое Верование',
577 'duration': 370,
578 'uploader': 'Black Shadow',
579 'artist': 'Black Shadow',
580 'track': 'Слепое Верование',
581 },
582 }, {
583 'md5': '4cc7e804579122b17ea95af7834c9233',
584 'info_dict': {
585 'id': '135220665_111802303',
3c989818 586 'ext': 'mp4',
2d19fb50
S
587 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
588 'duration': 423,
589 'uploader': 'Black Shadow',
590 'artist': 'Black Shadow',
591 'track': 'Война - Негасимое Бездны Пламя!',
592 },
2d19fb50 593 }],
51815886 594 'params': {
3c989818 595 'skip_download': True,
51815886
S
596 'usenetrc': True,
597 },
2d19fb50
S
598 'skip': 'Requires vk account credentials',
599 }, {
600 # single YouTube embed, no leading -
601 'url': 'https://vk.com/wall85155021_6319',
602 'info_dict': {
603 'id': '85155021_6319',
3c989818 604 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
605 },
606 'playlist_count': 1,
51815886
S
607 'params': {
608 'usenetrc': True,
609 },
2d19fb50
S
610 'skip': 'Requires vk account credentials',
611 }, {
612 # wall page URL
613 'url': 'https://vk.com/wall-23538238_35',
614 'only_matching': True,
615 }, {
616 # mobile wall page URL
617 'url': 'https://m.vk.com/wall-23538238_35',
618 'only_matching': True,
619 }]
3c989818 620 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
0e6ec3ca 621 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
3c989818
RA
622
623 def _decode(self, enc):
624 dec = ''
625 e = n = 0
626 for c in enc:
627 r = self._BASE64_CHARS.index(c)
628 cond = n % 4
629 e = 64 * e + r if cond else r
630 n += 1
631 if cond:
632 dec += chr(255 & e >> (-2 * n & 6))
633 return dec
634
635 def _unmask_url(self, mask_url, vk_id):
636 if 'audio_api_unavailable' in mask_url:
637 extra = mask_url.split('?extra=')[1].split('#')
638 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
639 mask_url = list(self._decode(extra[0]))
640 url_len = len(mask_url)
641 indexes = [None] * url_len
642 index = int(base) ^ vk_id
643 for n in range(url_len - 1, -1, -1):
644 index = (url_len * (n + 1) ^ index + n) % url_len
645 indexes[n] = index
646 for n in range(1, url_len):
647 c = mask_url[n]
648 index = indexes[url_len - 1 - n]
649 mask_url[n] = mask_url[index]
650 mask_url[index] = c
651 mask_url = ''.join(mask_url)
652 return mask_url
2d19fb50
S
653
654 def _real_extract(self, url):
655 post_id = self._match_id(url)
656
3c989818
RA
657 webpage = self._download_payload('wkview', post_id, {
658 'act': 'show',
659 'w': 'wall' + post_id,
660 })[1]
2d19fb50
S
661
662 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 663 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
664
665 entries = []
666
3c989818
RA
667 for audio in re.findall(r'data-audio="([^"]+)', webpage):
668 audio = self._parse_json(unescapeHTML(audio), post_id)
0e6ec3ca 669 a = self._AUDIO._make(audio[:16])
3c989818
RA
670 if not a.url:
671 continue
672 title = unescapeHTML(a.title)
7e70620a 673 performer = unescapeHTML(a.performer)
3c989818
RA
674 entries.append({
675 'id': '%s_%s' % (a.owner_id, a.id),
676 'url': self._unmask_url(a.url, a.ads['vk_id']),
7e70620a
RA
677 'title': '%s - %s' % (performer, title) if performer else title,
678 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
679 'duration': int_or_none(a.duration),
3c989818 680 'uploader': uploader,
7e70620a 681 'artist': performer,
3c989818
RA
682 'track': title,
683 'ext': 'mp4',
079a7cfc 684 'protocol': 'm3u8_native',
3c989818 685 })
2d19fb50
S
686
687 for video in re.finditer(
688 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
689 entries.append(self.url_result(
690 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
691
692 title = 'Wall post %s' % post_id
693
694 return self.playlist_result(
695 orderedSet(entries), post_id,
696 '%s - %s' % (uploader, title) if uploader else title,
697 description)