]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..compat import ( | |
5 | compat_b64decode, | |
6 | compat_urllib_parse_unquote_plus, | |
7 | ) | |
8 | from ..utils import ( | |
9 | determine_ext, | |
10 | ExtractorError, | |
11 | int_or_none, | |
12 | js_to_json, | |
13 | KNOWN_EXTENSIONS, | |
14 | parse_filesize, | |
15 | rot47, | |
16 | url_or_none, | |
17 | urlencode_postdata, | |
18 | ) | |
19 | ||
20 | ||
21 | class SharedBaseIE(InfoExtractor): | |
22 | def _real_extract(self, url): | |
23 | video_id = self._match_id(url) | |
24 | ||
25 | webpage, urlh = self._download_webpage_handle(url, video_id) | |
26 | ||
27 | if self._FILE_NOT_FOUND in webpage: | |
28 | raise ExtractorError( | |
29 | 'Video %s does not exist' % video_id, expected=True) | |
30 | ||
31 | video_url = self._extract_video_url(webpage, video_id, url) | |
32 | ||
33 | title = self._extract_title(webpage) | |
34 | filesize = int_or_none(self._extract_filesize(webpage)) | |
35 | ||
36 | return { | |
37 | 'id': video_id, | |
38 | 'url': video_url, | |
39 | 'ext': 'mp4', | |
40 | 'filesize': filesize, | |
41 | 'title': title, | |
42 | } | |
43 | ||
44 | def _extract_title(self, webpage): | |
45 | return compat_b64decode(self._html_search_meta( | |
46 | 'full:title', webpage, 'title')).decode('utf-8') | |
47 | ||
48 | def _extract_filesize(self, webpage): | |
49 | return self._html_search_meta( | |
50 | 'full:size', webpage, 'file size', fatal=False) | |
51 | ||
52 | ||
53 | class SharedIE(SharedBaseIE): | |
54 | IE_DESC = 'shared.sx' | |
55 | _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})' | |
56 | _FILE_NOT_FOUND = '>File does not exist<' | |
57 | ||
58 | _TEST = { | |
59 | 'url': 'http://shared.sx/0060718775', | |
60 | 'md5': '106fefed92a8a2adb8c98e6a0652f49b', | |
61 | 'info_dict': { | |
62 | 'id': '0060718775', | |
63 | 'ext': 'mp4', | |
64 | 'title': 'Bmp4', | |
65 | 'filesize': 1720110, | |
66 | }, | |
67 | } | |
68 | ||
69 | def _extract_video_url(self, webpage, video_id, url): | |
70 | download_form = self._hidden_inputs(webpage) | |
71 | ||
72 | video_page = self._download_webpage( | |
73 | url, video_id, 'Downloading video page', | |
74 | data=urlencode_postdata(download_form), | |
75 | headers={ | |
76 | 'Content-Type': 'application/x-www-form-urlencoded', | |
77 | 'Referer': url, | |
78 | }) | |
79 | ||
80 | video_url = self._html_search_regex( | |
81 | r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', | |
82 | video_page, 'video URL', group='url') | |
83 | ||
84 | return video_url | |
85 | ||
86 | ||
87 | class VivoIE(SharedBaseIE): | |
88 | IE_DESC = 'vivo.sx' | |
89 | _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})' | |
90 | _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' | |
91 | ||
92 | _TESTS = [{ | |
93 | 'url': 'http://vivo.sx/d7ddda0e78', | |
94 | 'md5': '15b3af41be0b4fe01f4df075c2678b2c', | |
95 | 'info_dict': { | |
96 | 'id': 'd7ddda0e78', | |
97 | 'ext': 'mp4', | |
98 | 'title': 'Chicken', | |
99 | 'filesize': 515659, | |
100 | }, | |
101 | }, { | |
102 | 'url': 'http://vivo.st/d7ddda0e78', | |
103 | 'only_matching': True, | |
104 | }] | |
105 | ||
106 | def _extract_title(self, webpage): | |
107 | title = self._html_search_regex( | |
108 | r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, | |
109 | 'title', default=None, group='title') | |
110 | if title: | |
111 | ext = determine_ext(title) | |
112 | if ext.lower() in KNOWN_EXTENSIONS: | |
113 | title = title.rpartition('.' + ext)[0] | |
114 | return title | |
115 | return self._og_search_title(webpage) | |
116 | ||
117 | def _extract_filesize(self, webpage): | |
118 | return parse_filesize(self._search_regex( | |
119 | r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', | |
120 | webpage, 'filesize', fatal=False)) | |
121 | ||
122 | def _extract_video_url(self, webpage, video_id, url): | |
123 | def decode_url_old(encoded_url): | |
124 | return compat_b64decode(encoded_url).decode('utf-8') | |
125 | ||
126 | stream_url = self._search_regex( | |
127 | r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, | |
128 | 'stream url', default=None, group='url') | |
129 | if stream_url: | |
130 | stream_url = url_or_none(decode_url_old(stream_url)) | |
131 | if stream_url: | |
132 | return stream_url | |
133 | ||
134 | def decode_url(encoded_url): | |
135 | return rot47(compat_urllib_parse_unquote_plus(encoded_url)) | |
136 | ||
137 | return decode_url(self._parse_json( | |
138 | self._search_regex( | |
139 | r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, | |
140 | 'stream'), | |
141 | video_id, transform_source=js_to_json)['source']) |