]>
Commit | Line | Data |
---|---|---|
60d142aa | 1 | # encoding: utf-8 |
94a23d2a PH |
2 | from __future__ import unicode_literals |
3 | ||
60d142aa JMF |
4 | import re |
5 | import json | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | compat_str, | |
10 | unescapeHTML, | |
11 | ) | |
12 | ||
13 | ||
14 | class VKIE(InfoExtractor): | |
94a23d2a | 15 | IE_NAME = 'vk.com' |
60d142aa JMF |
16 | _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' |
17 | ||
913f3292 | 18 | _TESTS = [{ |
94a23d2a PH |
19 | 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', |
20 | 'file': '162222515.flv', | |
21 | 'md5': '0deae91935c54e00003c2a00646315f0', | |
22 | 'info_dict': { | |
23 | 'title': 'ProtivoGunz - Хуёвая песня', | |
24 | 'uploader': 'Noize MC', | |
60d142aa | 25 | }, |
913f3292 PH |
26 | }, |
27 | { | |
28 | 'url': 'http://vk.com/video4643923_163339118', | |
29 | 'file': '163339118.mp4', | |
30 | 'md5': 'f79bccb5cd182b1f43502ca5685b2b36', | |
31 | 'info_dict': { | |
32 | 'uploader': 'Elvira Dzhonik', | |
33 | 'title': 'Dream Theater - Hollow Years Live at Budokan 720*', | |
34 | } | |
35 | }] | |
60d142aa JMF |
36 | |
37 | def _real_extract(self, url): | |
38 | mobj = re.match(self._VALID_URL, url) | |
39 | video_id = mobj.group('id') | |
40 | info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id | |
41 | info_page = self._download_webpage(info_url, video_id) | |
42 | m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) | |
43 | if m_yt is not None: | |
44 | self.to_screen(u'Youtube video detected') | |
45 | return self.url_result(m_yt.group(1), 'Youtube') | |
94a23d2a | 46 | data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') |
608bf698 | 47 | data = json.loads(data_json) |
60d142aa | 48 | |
913f3292 PH |
49 | formats = [{ |
50 | 'format_id': k, | |
51 | 'url': v, | |
52 | 'width': int(k[len('url'):]), | |
53 | } for k, v in data.items() | |
54 | if k.startswith('url')] | |
55 | self._sort_formats(formats) | |
56 | ||
60d142aa | 57 | return { |
608bf698 | 58 | 'id': compat_str(data['vid']), |
913f3292 | 59 | 'formats': formats, |
608bf698 | 60 | 'title': unescapeHTML(data['md_title']), |
913f3292 PH |
61 | 'thumbnail': data.get('jpg'), |
62 | 'uploader': data.get('md_author'), | |
60d142aa | 63 | } |