]>
Commit | Line | Data |
---|---|---|
916c1452 S |
1 | from __future__ import unicode_literals |
2 | ||
916c1452 | 3 | from .common import InfoExtractor |
edc2a1f6 S |
4 | from ..compat import ( |
5 | compat_b64decode, | |
6 | compat_urllib_parse_unquote_plus, | |
7 | ) | |
916c1452 | 8 | from ..utils import ( |
e438e814 | 9 | determine_ext, |
916c1452 | 10 | ExtractorError, |
916c1452 | 11 | int_or_none, |
edc2a1f6 | 12 | js_to_json, |
e438e814 S |
13 | KNOWN_EXTENSIONS, |
14 | parse_filesize, | |
edc2a1f6 | 15 | rot47, |
96a91b15 | 16 | url_or_none, |
6e6bc8da | 17 | urlencode_postdata, |
916c1452 S |
18 | ) |
19 | ||
20 | ||
9dc13a67 S |
21 | class SharedBaseIE(InfoExtractor): |
22 | def _real_extract(self, url): | |
23 | video_id = self._match_id(url) | |
24 | ||
25 | webpage, urlh = self._download_webpage_handle(url, video_id) | |
26 | ||
27 | if self._FILE_NOT_FOUND in webpage: | |
28 | raise ExtractorError( | |
29 | 'Video %s does not exist' % video_id, expected=True) | |
30 | ||
31 | video_url = self._extract_video_url(webpage, video_id, url) | |
32 | ||
0e6f914b | 33 | title = self._extract_title(webpage) |
e438e814 | 34 | filesize = int_or_none(self._extract_filesize(webpage)) |
9dc13a67 S |
35 | |
36 | return { | |
37 | 'id': video_id, | |
38 | 'url': video_url, | |
39 | 'ext': 'mp4', | |
40 | 'filesize': filesize, | |
41 | 'title': title, | |
42 | } | |
43 | ||
0e6f914b DH |
44 | def _extract_title(self, webpage): |
45 | return compat_b64decode(self._html_search_meta( | |
46 | 'full:title', webpage, 'title')).decode('utf-8') | |
47 | ||
e438e814 S |
48 | def _extract_filesize(self, webpage): |
49 | return self._html_search_meta( | |
50 | 'full:size', webpage, 'file size', fatal=False) | |
51 | ||
9dc13a67 S |
52 | |
53 | class SharedIE(SharedBaseIE): | |
54 | IE_DESC = 'shared.sx' | |
55 | _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})' | |
56 | _FILE_NOT_FOUND = '>File does not exist<' | |
916c1452 | 57 | |
9dc13a67 | 58 | _TEST = { |
916c1452 | 59 | 'url': 'http://shared.sx/0060718775', |
32582633 | 60 | 'md5': '106fefed92a8a2adb8c98e6a0652f49b', |
916c1452 S |
61 | 'info_dict': { |
62 | 'id': '0060718775', | |
63 | 'ext': 'mp4', | |
32582633 | 64 | 'title': 'Bmp4', |
f11c3163 | 65 | 'filesize': 1720110, |
916c1452 | 66 | }, |
9dc13a67 | 67 | } |
916c1452 | 68 | |
9dc13a67 | 69 | def _extract_video_url(self, webpage, video_id, url): |
f8da79f8 | 70 | download_form = self._hidden_inputs(webpage) |
289a16b4 | 71 | |
1cc79574 | 72 | video_page = self._download_webpage( |
9dc13a67 | 73 | url, video_id, 'Downloading video page', |
05c8268c S |
74 | data=urlencode_postdata(download_form), |
75 | headers={ | |
76 | 'Content-Type': 'application/x-www-form-urlencoded', | |
9dc13a67 | 77 | 'Referer': url, |
05c8268c | 78 | }) |
916c1452 | 79 | |
1cc79574 | 80 | video_url = self._html_search_regex( |
05c8268c S |
81 | r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', |
82 | video_page, 'video URL', group='url') | |
916c1452 | 83 | |
9dc13a67 S |
84 | return video_url |
85 | ||
86 | ||
87 | class VivoIE(SharedBaseIE): | |
88 | IE_DESC = 'vivo.sx' | |
89 | _VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})' | |
90 | _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' | |
91 | ||
92 | _TEST = { | |
93 | 'url': 'http://vivo.sx/d7ddda0e78', | |
94 | 'md5': '15b3af41be0b4fe01f4df075c2678b2c', | |
95 | 'info_dict': { | |
96 | 'id': 'd7ddda0e78', | |
916c1452 | 97 | 'ext': 'mp4', |
9dc13a67 | 98 | 'title': 'Chicken', |
e438e814 | 99 | 'filesize': 515659, |
9dc13a67 S |
100 | }, |
101 | } | |
102 | ||
0e6f914b | 103 | def _extract_title(self, webpage): |
e438e814 | 104 | title = self._html_search_regex( |
0e6f914b DH |
105 | r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, |
106 | 'title', default=None, group='title') | |
e438e814 S |
107 | if title: |
108 | ext = determine_ext(title) | |
109 | if ext.lower() in KNOWN_EXTENSIONS: | |
110 | title = title.rpartition('.' + ext)[0] | |
111 | return title | |
0e6f914b DH |
112 | return self._og_search_title(webpage) |
113 | ||
e438e814 S |
114 | def _extract_filesize(self, webpage): |
115 | return parse_filesize(self._search_regex( | |
116 | r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', | |
117 | webpage, 'filesize', fatal=False)) | |
118 | ||
119 | def _extract_video_url(self, webpage, video_id, url): | |
edc2a1f6 | 120 | def decode_url_old(encoded_url): |
96a91b15 S |
121 | return compat_b64decode(encoded_url).decode('utf-8') |
122 | ||
edc2a1f6 | 123 | stream_url = self._search_regex( |
96a91b15 | 124 | r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, |
edc2a1f6 S |
125 | 'stream url', default=None, group='url') |
126 | if stream_url: | |
127 | stream_url = url_or_none(decode_url_old(stream_url)) | |
96a91b15 S |
128 | if stream_url: |
129 | return stream_url | |
edc2a1f6 S |
130 | |
131 | def decode_url(encoded_url): | |
132 | return rot47(compat_urllib_parse_unquote_plus(encoded_url)) | |
133 | ||
134 | return decode_url(self._parse_json( | |
9dc13a67 | 135 | self._search_regex( |
edc2a1f6 S |
136 | r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, |
137 | 'stream'), | |
138 | video_id, transform_source=js_to_json)['source']) |