]>
Commit | Line | Data |
---|---|---|
aec74dd9 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_urllib_parse_urlparse, | |
8 | parse_duration, | |
9 | qualities, | |
10 | ) | |
11 | ||
12 | ||
13 | class VuClipIE(InfoExtractor): | |
f44e5d8b | 14 | _VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)' |
aec74dd9 PH |
15 | |
16 | _TEST = { | |
17 | 'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434', | |
18 | 'md5': '92ac9d1ccefec4f0bb474661ab144fcf', | |
19 | 'info_dict': { | |
20 | 'id': '843902317', | |
21 | 'ext': '3gp', | |
22 | 'title': 'Movie Trailer: Noah', | |
23 | 'duration': 139, | |
24 | } | |
25 | } | |
26 | ||
27 | def _real_extract(self, url): | |
28 | mobj = re.match(self._VALID_URL, url) | |
29 | video_id = mobj.group('id') | |
30 | ||
31 | webpage = self._download_webpage(url, video_id) | |
32 | ad_m = re.search( | |
33 | r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage) | |
34 | if ad_m: | |
35 | urlr = compat_urllib_parse_urlparse(url) | |
36 | adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1) | |
37 | webpage = self._download_webpage( | |
38 | adfree_url, video_id, note='Download post-ad page') | |
39 | ||
40 | links_code = self._search_regex( | |
41 | r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage, | |
42 | 'links') | |
43 | title = self._html_search_regex( | |
44 | r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip() | |
45 | ||
46 | quality_order = qualities(['Reg', 'Hi']) | |
47 | formats = [] | |
48 | for url, q in re.findall( | |
49 | r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code): | |
50 | format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q | |
51 | formats.append({ | |
52 | 'format_id': format_id, | |
53 | 'url': url, | |
54 | 'quality': quality_order(q), | |
55 | }) | |
56 | self._sort_formats(formats) | |
57 | ||
58 | duration = parse_duration(self._search_regex( | |
59 | r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False)) | |
60 | ||
61 | return { | |
62 | 'id': video_id, | |
63 | 'formats': formats, | |
64 | 'title': title, | |
65 | 'duration': duration, | |
66 | } |