]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vk.py
[youtube] Force `hl=en` for comments (#594)
[yt-dlp.git] / yt_dlp / extractor / vk.py
CommitLineData
dcdb292f 1# coding: utf-8
94a23d2a
PH
2from __future__ import unicode_literals
3
51815886 4import collections
0e6ec3ca 5import functools
60d142aa 6import re
60d142aa
JMF
7
8from .common import InfoExtractor
059cd768 9from ..compat import compat_urlparse
60d142aa 10from ..utils import (
2d19fb50 11 clean_html,
9032dc28 12 ExtractorError,
2d19fb50 13 get_element_by_class,
bf4b3b6b 14 int_or_none,
0e6ec3ca 15 OnDemandPagedList,
1cc79574 16 orderedSet,
ad1bc71a 17 str_or_none,
8117df4c 18 str_to_int,
60d142aa 19 unescapeHTML,
a7ee8a00 20 unified_timestamp,
3052a30d 21 url_or_none,
6e6bc8da 22 urlencode_postdata,
1cc79574 23)
e3845525 24from .dailymotion import DailymotionIE
3c989818 25from .odnoklassniki import OdnoklassnikiIE
c4737bea 26from .pladform import PladformIE
e3845525 27from .vimeo import VimeoIE
5113b691 28from .youtube import YoutubeIE
60d142aa
JMF
29
30
2d19fb50
S
31class VKBaseIE(InfoExtractor):
32 _NETRC_MACHINE = 'vk'
33
34 def _login(self):
68217024 35 username, password = self._get_login_info()
2d19fb50
S
36 if username is None:
37 return
38
39 login_page, url_handle = self._download_webpage_handle(
40 'https://vk.com', None, 'Downloading login page')
41
42 login_form = self._hidden_inputs(login_page)
43
44 login_form.update({
45 'email': username.encode('cp1251'),
46 'pass': password.encode('cp1251'),
47 })
48
e3c1266f
S
49 # vk serves two same remixlhk cookies in Set-Cookie header and expects
50 # first one to be actually set
51 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
52
53 login_page = self._download_webpage(
54 'https://login.vk.com/?act=login', None,
e4d95865 55 note='Logging in',
2d19fb50
S
56 data=urlencode_postdata(login_form))
57
58 if re.search(r'onLoginFailed', login_page):
59 raise ExtractorError(
60 'Unable to login, incorrect username and/or password', expected=True)
61
62 def _real_initialize(self):
63 self._login()
64
3c989818
RA
65 def _download_payload(self, path, video_id, data, fatal=True):
66 data['al'] = 1
67 code, payload = self._download_json(
68 'https://vk.com/%s.php' % path, video_id,
69 data=urlencode_postdata(data), fatal=fatal,
70 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
71 if code == '3':
72 self.raise_login_required()
73 elif code == '8':
74 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
75 return payload
76
2d19fb50
S
77
78class VKIE(VKBaseIE):
1ecb5d1d
S
79 IE_NAME = 'vk'
80 IE_DESC = 'VK'
cf9cf7dd
S
81 _VALID_URL = r'''(?x)
82 https?://
83 (?:
04e88ca2 84 (?:
bdafd88d 85 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 86 (?:www\.)?daxab.com/
87 )
88 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 89 (?:
bdafd88d 90 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
04e88ca2 91 (?:www\.)?daxab.com/embed/
cf9cf7dd 92 )
04e88ca2 93 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
cf9cf7dd
S
94 )
95 '''
9032dc28
S
96 _TESTS = [
97 {
98 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 99 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 100 'info_dict': {
220828f2 101 'id': '-77521_162222515',
09f934b0 102 'ext': 'mp4',
9032dc28 103 'title': 'ProtivoGunz - Хуёвая песня',
36300346 104 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 105 'uploader_id': '-77521',
9032dc28 106 'duration': 195,
ad1bc71a 107 'timestamp': 1329049880,
42e1ff86 108 'upload_date': '20120212',
9032dc28 109 },
60d142aa 110 },
9032dc28 111 {
c52331f3 112 'url': 'http://vk.com/video205387401_165548505',
9032dc28 113 'info_dict': {
220828f2 114 'id': '205387401_165548505',
9032dc28 115 'ext': 'mp4',
c52331f3 116 'title': 'No name',
ad1bc71a
RA
117 'uploader': 'Tom Cruise',
118 'uploader_id': '205387401',
c52331f3 119 'duration': 9,
ad1bc71a
RA
120 'timestamp': 1374364108,
121 'upload_date': '20130720',
9032dc28
S
122 }
123 },
ca97a56e
S
124 {
125 'note': 'Embedded video',
3c989818
RA
126 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
127 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 128 'info_dict': {
3c989818 129 'id': '-77521_162222515',
ca97a56e 130 'ext': 'mp4',
3c989818
RA
131 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
132 'title': 'ProtivoGunz - Хуёвая песня',
133 'duration': 195,
134 'upload_date': '20120212',
135 'timestamp': 1329049880,
136 'uploader_id': '-77521',
04e88ca2 137 },
ca97a56e 138 },
9032dc28 139 {
c52331f3
WS
140 # VIDEO NOW REMOVED
141 # please update if you find a video whose URL follows the same pattern
9032dc28
S
142 'url': 'http://vk.com/video-8871596_164049491',
143 'md5': 'a590bcaf3d543576c9bd162812387666',
144 'note': 'Only available for registered users',
145 'info_dict': {
220828f2 146 'id': '-8871596_164049491',
9032dc28
S
147 'ext': 'mp4',
148 'uploader': 'Триллеры',
57bdc730 149 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 150 'duration': 8352,
8117df4c
S
151 'upload_date': '20121218',
152 'view_count': int,
9032dc28 153 },
3c989818 154 'skip': 'Removed',
ca97a56e 155 },
57bdc730
S
156 {
157 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 158 'info_dict': {
220828f2 159 'id': '-43215063_168067957',
57bdc730 160 'ext': 'mp4',
3c989818 161 'uploader': 'Bro Mazter',
57bdc730
S
162 'title': ' ',
163 'duration': 7291,
42e1ff86 164 'upload_date': '20140328',
3c989818
RA
165 'uploader_id': '223413403',
166 'timestamp': 1396018030,
57bdc730
S
167 },
168 'skip': 'Requires vk account credentials',
169 },
849086a1
S
170 {
171 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
172 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
173 'note': 'ivi.ru embed',
174 'info_dict': {
220828f2 175 'id': '-43215063_169084319',
849086a1
S
176 'ext': 'mp4',
177 'title': 'Книга Илая',
178 'duration': 6771,
42e1ff86 179 'upload_date': '20140626',
8117df4c 180 'view_count': int,
849086a1 181 },
3c989818 182 'skip': 'Removed',
849086a1 183 },
79913fde
S
184 {
185 # video (removed?) only available with list id
186 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
187 'md5': '091287af5402239a1051c37ec7b92913',
188 'info_dict': {
220828f2 189 'id': '30481095_171201961',
79913fde
S
190 'ext': 'mp4',
191 'title': 'ТюменцевВВ_09.07.2015',
192 'uploader': 'Anton Ivanov',
193 'duration': 109,
194 'upload_date': '20150709',
195 'view_count': int,
196 },
a7ee8a00 197 'skip': 'Removed',
79913fde 198 },
9281f6d2
S
199 {
200 # youtube embed
201 'url': 'https://vk.com/video276849682_170681728',
202 'info_dict': {
203 'id': 'V3K4mi0SYkc',
220828f2 204 'ext': 'mp4',
9281f6d2 205 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 206 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 207 'duration': 178,
9281f6d2 208 'upload_date': '20130116',
ad1bc71a 209 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
210 'uploader_id': 'thecjf',
211 'view_count': int,
212 },
213 },
e3845525
KM
214 {
215 # dailymotion embed
216 'url': 'https://vk.com/video-37468416_456239855',
217 'info_dict': {
218 'id': 'k3lz2cmXyRuJQSjGHUv',
219 'ext': 'mp4',
220 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
5ef62fc4 221 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
e3845525
KM
222 'uploader': 'AniLibria.Tv',
223 'upload_date': '20160914',
224 'uploader_id': 'x1p5vl5',
225 'timestamp': 1473877246,
226 },
227 'params': {
228 'skip_download': True,
93aa0b63 229 },
e3845525 230 },
bf4b3b6b
S
231 {
232 # video key is extra_data not url\d+
233 'url': 'http://vk.com/video-110305615_171782105',
234 'md5': 'e13fcda136f99764872e739d13fac1d1',
235 'info_dict': {
220828f2 236 'id': '-110305615_171782105',
bf4b3b6b
S
237 'ext': 'mp4',
238 'title': 'S-Dance, репетиции к The way show',
239 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
240 'uploader_id': '-110305615',
241 'timestamp': 1454859345,
bf4b3b6b 242 'upload_date': '20160207',
ad1bc71a
RA
243 },
244 'params': {
245 'skip_download': True,
bf4b3b6b
S
246 },
247 },
93aa0b63 248 {
424ed37e 249 # finished live stream, postlive_mp4
93aa0b63 250 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 251 'info_dict': {
220828f2 252 'id': '-387766_456242764',
93aa0b63 253 'ext': 'mp4',
220828f2 254 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
255 'uploader': 'Игромания',
256 'duration': 5239,
220828f2
RA
257 # TODO: use act=show to extract view_count
258 # 'view_count': int,
259 'upload_date': '20160929',
260 'uploader_id': '-387766',
261 'timestamp': 1475137527,
93aa0b63 262 },
3c989818
RA
263 'params': {
264 'skip_download': True,
265 },
93aa0b63 266 },
475f8a45 267 {
424ed37e 268 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
269 # stream by the time you are reading this comment
270 'url': 'https://vk.com/video-140332_456239111',
271 'only_matching': True,
272 },
a8363f3a
PH
273 {
274 # removed video, just testing that we match the pattern
275 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
276 'only_matching': True,
277 },
e58066e2
S
278 {
279 # age restricted video, requires vk account credentials
280 'url': 'https://vk.com/video205387401_164765225',
281 'only_matching': True,
282 },
a5e52a1f
S
283 {
284 # pladform embed
285 'url': 'https://vk.com/video-76116461_171554880',
286 'only_matching': True,
bdafd88d
S
287 },
288 {
289 'url': 'http://new.vk.com/video205387401_165548505',
290 'only_matching': True,
643dc0fc
CP
291 },
292 {
293 # This video is no longer available, because its author has been blocked.
294 'url': 'https://vk.com/video-10639516_456240611',
295 'only_matching': True,
a640c4d2 296 },
297 {
298 # The video is not available in your region.
299 'url': 'https://vk.com/video-51812607_171445436',
300 'only_matching': True,
301 }]
9032dc28 302
b73612a2 303 @staticmethod
304 def _extract_sibnet_urls(webpage):
305 # https://help.sibnet.ru/?sibnet_video_embed
306 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
307 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
308 webpage)]
309
60d142aa
JMF
310 def _real_extract(self, url):
311 mobj = re.match(self._VALID_URL, url)
ca97a56e
S
312 video_id = mobj.group('videoid')
313
3c989818 314 mv_data = {}
04e88ca2 315 if video_id:
3c989818
RA
316 data = {
317 'act': 'show_inline',
318 'video': video_id,
319 }
04e88ca2 320 # Some videos (removed?) can only be downloaded with list id specified
321 list_id = mobj.group('list_id')
322 if list_id:
3c989818
RA
323 data['list'] = list_id
324
325 payload = self._download_payload('al_video', video_id, data)
326 info_page = payload[1]
327 opts = payload[-1]
328 mv_data = opts.get('mvData') or {}
329 player = opts.get('player') or {}
04e88ca2 330 else:
ca97a56e 331 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 332
3c989818
RA
333 info_page = self._download_webpage(
334 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 335
3c989818
RA
336 error_message = self._html_search_regex(
337 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
338 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
339 info_page, 'error message', default=None)
340 if error_message:
341 raise ExtractorError(error_message, expected=True)
ee48b6a8 342
3c989818
RA
343 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
344 raise ExtractorError(
345 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
346 expected=True)
7f220b2f 347
3c989818 348 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 349
3c989818
RA
350 ERRORS = {
351 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
352 ERROR_COPYRIGHT,
1d1d60f6 353
3c989818
RA
354 r'>The video .*? was removed from public access by request of the copyright holder.<':
355 ERROR_COPYRIGHT,
3d36cea4 356
3c989818
RA
357 r'<!>Please log in or <':
358 'Video %s is only available for registered users, '
359 'use --username and --password options to provide account credentials.',
3d36cea4 360
3c989818
RA
361 r'<!>Unknown error':
362 'Video %s does not exist.',
1aa5172f 363
3c989818
RA
364 r'<!>Видео временно недоступно':
365 'Video %s is temporarily unavailable.',
d919fa33 366
3c989818
RA
367 r'<!>Access denied':
368 'Access denied to video %s.',
643dc0fc 369
3c989818
RA
370 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
371 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 372
3c989818
RA
373 r'<!>This video is no longer available, because its author has been blocked.':
374 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 375
3c989818
RA
376 r'<!>This video is no longer available, because it has been deleted.':
377 'Video %s is no longer available, because it has been deleted.',
a640c4d2 378
3c989818
RA
379 r'<!>The video .+? is not available in your region.':
380 'Video %s is not available in your region.',
381 }
382
383 for error_re, error_msg in ERRORS.items():
384 if re.search(error_re, info_page):
385 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 386
3c989818
RA
387 player = self._parse_json(self._search_regex(
388 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
389 info_page, 'player params'), video_id)
9334f8f1 390
5113b691 391 youtube_url = YoutubeIE._extract_url(info_page)
46478456 392 if youtube_url:
3c989818 393 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 394
09b9c45e 395 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 396 if vimeo_url is not None:
3c989818 397 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 398
c4737bea
S
399 pladform_url = PladformIE._extract_url(info_page)
400 if pladform_url:
3c989818 401 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 402
7a1818c9 403 m_rutube = re.search(
35972ba1 404 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 405 if m_rutube is not None:
7a1818c9
PH
406 rutube_url = self._proto_relative_url(
407 m_rutube.group(1).replace('\\', ''))
408 return self.url_result(rutube_url)
409
e3845525
KM
410 dailymotion_urls = DailymotionIE._extract_urls(info_page)
411 if dailymotion_urls:
412 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
413
3c989818
RA
414 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
415 if odnoklassniki_url:
416 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
417
b73612a2 418 sibnet_urls = self._extract_sibnet_urls(info_page)
419 if sibnet_urls:
420 return self.url_result(sibnet_urls[0])
421
054932f4 422 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 423 if m_opts:
054932f4 424 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
425 if m_opts_url:
426 opts_url = m_opts_url.group(1)
427 if opts_url.startswith('//'):
428 opts_url = 'http:' + opts_url
429 return self.url_result(opts_url)
430
3c989818 431 data = player['params'][0]
475f8a45
S
432 title = unescapeHTML(data['md_title'])
433
424ed37e
S
434 # 2 = live
435 # 3 = post live (finished live)
9cdb0a33
S
436 is_live = data.get('live') == 2
437 if is_live:
475f8a45
S
438 title = self._live_title(title)
439
a7ee8a00 440 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 441 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 442 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 443
70d7b323
S
444 view_count = str_to_int(self._search_regex(
445 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 446 info_page, 'view count', default=None))
8117df4c 447
bf4b3b6b 448 formats = []
475f8a45 449 for format_id, format_url in data.items():
3052a30d
S
450 format_url = url_or_none(format_url)
451 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 452 continue
3089bc74
S
453 if (format_id.startswith(('url', 'cache'))
454 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
455 height = int_or_none(self._search_regex(
456 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
457 formats.append({
458 'format_id': format_id,
459 'url': format_url,
460 'height': height,
461 })
462 elif format_id == 'hls':
463 formats.extend(self._extract_m3u8_formats(
fb4fc449 464 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 465 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
466 elif format_id == 'rtmp':
467 formats.append({
468 'format_id': format_id,
469 'url': format_url,
470 'ext': 'flv',
471 })
913f3292
PH
472 self._sort_formats(formats)
473
60d142aa 474 return {
220828f2 475 'id': video_id,
913f3292 476 'formats': formats,
475f8a45 477 'title': title,
913f3292
PH
478 'thumbnail': data.get('jpg'),
479 'uploader': data.get('md_author'),
3c989818
RA
480 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
481 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 482 'timestamp': timestamp,
8117df4c 483 'view_count': view_count,
3c989818
RA
484 'like_count': int_or_none(mv_data.get('likes')),
485 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 486 'is_live': is_live,
60d142aa 487 }
469d4c89
WS
488
489
2d19fb50 490class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
491 IE_NAME = 'vk:uservideos'
492 IE_DESC = "VK - User's Videos"
0e6ec3ca 493 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
469d4c89 494 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 495 _TESTS = [{
0e6ec3ca
RA
496 'url': 'https://vk.com/videos-767561',
497 'info_dict': {
498 'id': '-767561_all',
499 },
500 'playlist_mincount': 1150,
501 }, {
502 'url': 'https://vk.com/videos-767561?section=uploaded',
15ec6693 503 'info_dict': {
0e6ec3ca 504 'id': '-767561_uploaded',
15ec6693 505 },
0e6ec3ca
RA
506 'playlist_mincount': 425,
507 }, {
508 'url': 'http://vk.com/videos205387401',
509 'only_matching': True,
dc786d3d
S
510 }, {
511 'url': 'http://vk.com/videos-77521',
512 'only_matching': True,
0436157b
S
513 }, {
514 'url': 'http://vk.com/videos-97664626?section=all',
515 'only_matching': True,
bdafd88d
S
516 }, {
517 'url': 'http://m.vk.com/videos205387401',
518 'only_matching': True,
519 }, {
520 'url': 'http://new.vk.com/videos205387401',
521 'only_matching': True,
dc786d3d 522 }]
0e6ec3ca
RA
523 _PAGE_SIZE = 1000
524 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
dc786d3d 525
0e6ec3ca 526 def _fetch_page(self, page_id, section, page):
3c989818
RA
527 l = self._download_payload('al_video', page_id, {
528 'act': 'load_videos_silent',
0e6ec3ca 529 'offset': page * self._PAGE_SIZE,
3c989818 530 'oid': page_id,
0e6ec3ca
RA
531 'section': section,
532 })[0][section]['list']
dc786d3d 533
3c989818 534 for video in l:
0e6ec3ca 535 v = self._VIDEO._make(video[:2])
3c989818 536 video_id = '%d_%d' % (v.owner_id, v.id)
0e6ec3ca
RA
537 yield self.url_result(
538 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
539
540 def _real_extract(self, url):
541 page_id, section = re.match(self._VALID_URL, url).groups()
542 if not section:
543 section = 'all'
544
545 entries = OnDemandPagedList(
546 functools.partial(self._fetch_page, page_id, section),
547 self._PAGE_SIZE)
dc786d3d 548
0e6ec3ca 549 return self.playlist_result(entries, '%s_%s' % (page_id, section))
2d19fb50
S
550
551
552class VKWallPostIE(VKBaseIE):
553 IE_NAME = 'vk:wallpost'
554 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
555 _TESTS = [{
556 # public page URL, audio playlist
557 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
558 'info_dict': {
3c989818
RA
559 'id': '-23538238_35',
560 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
561 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
562 },
563 'playlist': [{
564 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
565 'info_dict': {
566 'id': '135220665_111806521',
3c989818 567 'ext': 'mp4',
2d19fb50
S
568 'title': 'Black Shadow - Слепое Верование',
569 'duration': 370,
570 'uploader': 'Black Shadow',
571 'artist': 'Black Shadow',
572 'track': 'Слепое Верование',
573 },
574 }, {
575 'md5': '4cc7e804579122b17ea95af7834c9233',
576 'info_dict': {
577 'id': '135220665_111802303',
3c989818 578 'ext': 'mp4',
2d19fb50
S
579 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
580 'duration': 423,
581 'uploader': 'Black Shadow',
582 'artist': 'Black Shadow',
583 'track': 'Война - Негасимое Бездны Пламя!',
584 },
2d19fb50 585 }],
51815886 586 'params': {
3c989818 587 'skip_download': True,
51815886
S
588 'usenetrc': True,
589 },
2d19fb50
S
590 'skip': 'Requires vk account credentials',
591 }, {
592 # single YouTube embed, no leading -
593 'url': 'https://vk.com/wall85155021_6319',
594 'info_dict': {
595 'id': '85155021_6319',
3c989818 596 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
597 },
598 'playlist_count': 1,
51815886
S
599 'params': {
600 'usenetrc': True,
601 },
2d19fb50
S
602 'skip': 'Requires vk account credentials',
603 }, {
604 # wall page URL
605 'url': 'https://vk.com/wall-23538238_35',
606 'only_matching': True,
607 }, {
608 # mobile wall page URL
609 'url': 'https://m.vk.com/wall-23538238_35',
610 'only_matching': True,
611 }]
3c989818 612 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
0e6ec3ca 613 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
3c989818
RA
614
615 def _decode(self, enc):
616 dec = ''
617 e = n = 0
618 for c in enc:
619 r = self._BASE64_CHARS.index(c)
620 cond = n % 4
621 e = 64 * e + r if cond else r
622 n += 1
623 if cond:
624 dec += chr(255 & e >> (-2 * n & 6))
625 return dec
626
627 def _unmask_url(self, mask_url, vk_id):
628 if 'audio_api_unavailable' in mask_url:
629 extra = mask_url.split('?extra=')[1].split('#')
630 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
631 mask_url = list(self._decode(extra[0]))
632 url_len = len(mask_url)
633 indexes = [None] * url_len
634 index = int(base) ^ vk_id
635 for n in range(url_len - 1, -1, -1):
636 index = (url_len * (n + 1) ^ index + n) % url_len
637 indexes[n] = index
638 for n in range(1, url_len):
639 c = mask_url[n]
640 index = indexes[url_len - 1 - n]
641 mask_url[n] = mask_url[index]
642 mask_url[index] = c
643 mask_url = ''.join(mask_url)
644 return mask_url
2d19fb50
S
645
646 def _real_extract(self, url):
647 post_id = self._match_id(url)
648
3c989818
RA
649 webpage = self._download_payload('wkview', post_id, {
650 'act': 'show',
651 'w': 'wall' + post_id,
652 })[1]
2d19fb50
S
653
654 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 655 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
656
657 entries = []
658
3c989818
RA
659 for audio in re.findall(r'data-audio="([^"]+)', webpage):
660 audio = self._parse_json(unescapeHTML(audio), post_id)
0e6ec3ca 661 a = self._AUDIO._make(audio[:16])
3c989818
RA
662 if not a.url:
663 continue
664 title = unescapeHTML(a.title)
7e70620a 665 performer = unescapeHTML(a.performer)
3c989818
RA
666 entries.append({
667 'id': '%s_%s' % (a.owner_id, a.id),
668 'url': self._unmask_url(a.url, a.ads['vk_id']),
7e70620a
RA
669 'title': '%s - %s' % (performer, title) if performer else title,
670 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
671 'duration': int_or_none(a.duration),
3c989818 672 'uploader': uploader,
7e70620a 673 'artist': performer,
3c989818
RA
674 'track': title,
675 'ext': 'mp4',
676 'protocol': 'm3u8',
677 })
2d19fb50
S
678
679 for video in re.finditer(
680 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
681 entries.append(self.url_result(
682 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
683
684 title = 'Wall post %s' % post_id
685
686 return self.playlist_result(
687 orderedSet(entries), post_id,
688 '%s - %s' % (uploader, title) if uploader else title,
689 description)