]>
Commit | Line | Data |
---|---|---|
ac668111 | 1 | import urllib.parse |
2 | ||
916c1452 | 3 | from .common import InfoExtractor |
ac668111 | 4 | from ..compat import compat_b64decode |
916c1452 | 5 | from ..utils import ( |
ac668111 | 6 | KNOWN_EXTENSIONS, |
916c1452 | 7 | ExtractorError, |
ac668111 | 8 | determine_ext, |
916c1452 | 9 | int_or_none, |
edc2a1f6 | 10 | js_to_json, |
e438e814 | 11 | parse_filesize, |
edc2a1f6 | 12 | rot47, |
96a91b15 | 13 | url_or_none, |
6e6bc8da | 14 | urlencode_postdata, |
916c1452 S |
15 | ) |
16 | ||
17 | ||
9dc13a67 S |
18 | class SharedBaseIE(InfoExtractor): |
19 | def _real_extract(self, url): | |
20 | video_id = self._match_id(url) | |
21 | ||
22 | webpage, urlh = self._download_webpage_handle(url, video_id) | |
23 | ||
24 | if self._FILE_NOT_FOUND in webpage: | |
25 | raise ExtractorError( | |
26 | 'Video %s does not exist' % video_id, expected=True) | |
27 | ||
28 | video_url = self._extract_video_url(webpage, video_id, url) | |
29 | ||
0e6f914b | 30 | title = self._extract_title(webpage) |
e438e814 | 31 | filesize = int_or_none(self._extract_filesize(webpage)) |
9dc13a67 S |
32 | |
33 | return { | |
34 | 'id': video_id, | |
35 | 'url': video_url, | |
36 | 'ext': 'mp4', | |
37 | 'filesize': filesize, | |
38 | 'title': title, | |
39 | } | |
40 | ||
0e6f914b DH |
41 | def _extract_title(self, webpage): |
42 | return compat_b64decode(self._html_search_meta( | |
43 | 'full:title', webpage, 'title')).decode('utf-8') | |
44 | ||
e438e814 S |
45 | def _extract_filesize(self, webpage): |
46 | return self._html_search_meta( | |
47 | 'full:size', webpage, 'file size', fatal=False) | |
48 | ||
9dc13a67 S |
49 | |
50 | class SharedIE(SharedBaseIE): | |
51 | IE_DESC = 'shared.sx' | |
52 | _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})' | |
53 | _FILE_NOT_FOUND = '>File does not exist<' | |
916c1452 | 54 | |
9dc13a67 | 55 | _TEST = { |
916c1452 | 56 | 'url': 'http://shared.sx/0060718775', |
32582633 | 57 | 'md5': '106fefed92a8a2adb8c98e6a0652f49b', |
916c1452 S |
58 | 'info_dict': { |
59 | 'id': '0060718775', | |
60 | 'ext': 'mp4', | |
32582633 | 61 | 'title': 'Bmp4', |
f11c3163 | 62 | 'filesize': 1720110, |
916c1452 | 63 | }, |
9dc13a67 | 64 | } |
916c1452 | 65 | |
9dc13a67 | 66 | def _extract_video_url(self, webpage, video_id, url): |
f8da79f8 | 67 | download_form = self._hidden_inputs(webpage) |
289a16b4 | 68 | |
1cc79574 | 69 | video_page = self._download_webpage( |
9dc13a67 | 70 | url, video_id, 'Downloading video page', |
05c8268c S |
71 | data=urlencode_postdata(download_form), |
72 | headers={ | |
73 | 'Content-Type': 'application/x-www-form-urlencoded', | |
9dc13a67 | 74 | 'Referer': url, |
05c8268c | 75 | }) |
916c1452 | 76 | |
1cc79574 | 77 | video_url = self._html_search_regex( |
05c8268c S |
78 | r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', |
79 | video_page, 'video URL', group='url') | |
916c1452 | 80 | |
9dc13a67 S |
81 | return video_url |
82 | ||
83 | ||
84 | class VivoIE(SharedBaseIE): | |
85 | IE_DESC = 'vivo.sx' | |
b73612a2 | 86 | _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})' |
9dc13a67 S |
87 | _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' |
88 | ||
b73612a2 | 89 | _TESTS = [{ |
9dc13a67 S |
90 | 'url': 'http://vivo.sx/d7ddda0e78', |
91 | 'md5': '15b3af41be0b4fe01f4df075c2678b2c', | |
92 | 'info_dict': { | |
93 | 'id': 'd7ddda0e78', | |
916c1452 | 94 | 'ext': 'mp4', |
9dc13a67 | 95 | 'title': 'Chicken', |
e438e814 | 96 | 'filesize': 515659, |
9dc13a67 | 97 | }, |
b73612a2 | 98 | }, { |
99 | 'url': 'http://vivo.st/d7ddda0e78', | |
100 | 'only_matching': True, | |
101 | }] | |
9dc13a67 | 102 | |
0e6f914b | 103 | def _extract_title(self, webpage): |
e438e814 | 104 | title = self._html_search_regex( |
0e6f914b DH |
105 | r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, |
106 | 'title', default=None, group='title') | |
e438e814 S |
107 | if title: |
108 | ext = determine_ext(title) | |
109 | if ext.lower() in KNOWN_EXTENSIONS: | |
110 | title = title.rpartition('.' + ext)[0] | |
111 | return title | |
0e6f914b DH |
112 | return self._og_search_title(webpage) |
113 | ||
e438e814 S |
114 | def _extract_filesize(self, webpage): |
115 | return parse_filesize(self._search_regex( | |
116 | r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', | |
117 | webpage, 'filesize', fatal=False)) | |
118 | ||
119 | def _extract_video_url(self, webpage, video_id, url): | |
edc2a1f6 | 120 | def decode_url_old(encoded_url): |
96a91b15 S |
121 | return compat_b64decode(encoded_url).decode('utf-8') |
122 | ||
edc2a1f6 | 123 | stream_url = self._search_regex( |
96a91b15 | 124 | r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, |
edc2a1f6 S |
125 | 'stream url', default=None, group='url') |
126 | if stream_url: | |
127 | stream_url = url_or_none(decode_url_old(stream_url)) | |
96a91b15 S |
128 | if stream_url: |
129 | return stream_url | |
edc2a1f6 S |
130 | |
131 | def decode_url(encoded_url): | |
ac668111 | 132 | return rot47(urllib.parse.unquote_plus(encoded_url)) |
edc2a1f6 S |
133 | |
134 | return decode_url(self._parse_json( | |
9dc13a67 | 135 | self._search_regex( |
edc2a1f6 S |
136 | r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, |
137 | 'stream'), | |
138 | video_id, transform_source=js_to_json)['source']) |