]>
Commit | Line | Data |
---|---|---|
617c0b22 | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
1cc79574 | 4 | from ..utils import ( |
ac668111 | 5 | ExtractorError, |
bccdac68 | 6 | decode_packed_codes, |
2cd668ee | 7 | determine_ext, |
ceb33673 | 8 | int_or_none, |
59296bae | 9 | js_to_json, |
6e6bc8da | 10 | urlencode_postdata, |
5f28a1ac PP |
11 | ) |
12 | ||
617c0b22 | 13 | |
59296bae RA |
14 | # based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58 |
15 | def aa_decode(aa_code): | |
16 | symbol_table = [ | |
17 | ('7', '((゚ー゚) + (o^_^o))'), | |
18 | ('6', '((o^_^o) +(o^_^o))'), | |
19 | ('5', '((゚ー゚) + (゚Θ゚))'), | |
20 | ('2', '((o^_^o) - (゚Θ゚))'), | |
21 | ('4', '(゚ー゚)'), | |
22 | ('3', '(o^_^o)'), | |
23 | ('1', '(゚Θ゚)'), | |
24 | ('0', '(c^_^o)'), | |
25 | ] | |
26 | delim = '(゚Д゚)[゚ε゚]+' | |
27 | ret = '' | |
28 | for aa_char in aa_code.split(delim): | |
29 | for val, pat in symbol_table: | |
30 | aa_char = aa_char.replace(pat, val) | |
31 | aa_char = aa_char.replace('+ ', '') | |
32 | m = re.match(r'^\d+', aa_char) | |
33 | if m: | |
ac668111 | 34 | ret += chr(int(m.group(0), 8)) |
59296bae RA |
35 | else: |
36 | m = re.match(r'^u([\da-f]+)', aa_char) | |
37 | if m: | |
ac668111 | 38 | ret += chr(int(m.group(1), 16)) |
59296bae RA |
39 | return ret |
40 | ||
41 | ||
031ec536 | 42 | class XFileShareIE(InfoExtractor): |
41745523 | 43 | _SITES = ( |
00dd0cd5 | 44 | (r'aparat\.cam', 'Aparat'), |
59296bae RA |
45 | (r'clipwatching\.com', 'ClipWatching'), |
46 | (r'gounlimited\.to', 'GoUnlimited'), | |
47 | (r'govid\.me', 'GoVid'), | |
48 | (r'holavid\.com', 'HolaVid'), | |
49 | (r'streamty\.com', 'Streamty'), | |
2dc48df5 | 50 | (r'thevideobee\.to', 'TheVideoBee'), |
59296bae | 51 | (r'uqload\.com', 'Uqload'), |
2dc48df5 S |
52 | (r'vidbom\.com', 'VidBom'), |
53 | (r'vidlo\.us', 'vidlo'), | |
59296bae RA |
54 | (r'vidlocker\.xyz', 'VidLocker'), |
55 | (r'vidshare\.tv', 'VidShare'), | |
56 | (r'vup\.to', 'VUp'), | |
41d1cca3 | 57 | (r'wolfstream\.tv', 'WolfStream'), |
59296bae | 58 | (r'xvideosharing\.com', 'XVideoSharing'), |
41745523 S |
59 | ) |
60 | ||
61 | IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) | |
59296bae | 62 | _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' |
2dc48df5 | 63 | % '|'.join(site for site in list(zip(*_SITES))[0])) |
bfd973ec | 64 | _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])] |
5f28a1ac | 65 | |
33b72ce6 S |
66 | _FILE_NOT_FOUND_REGEXES = ( |
67 | r'>(?:404 - )?File Not Found<', | |
68 | r'>The file was removed by administrator<', | |
69 | ) | |
3ae165aa | 70 | |
5f28a1ac | 71 | _TESTS = [{ |
d8657ff7 G |
72 | 'url': 'https://uqload.com/dltx1wztngdz', |
73 | 'md5': '3cfbb65e4c90e93d7b37bcb65a595557', | |
74 | 'info_dict': { | |
75 | 'id': 'dltx1wztngdz', | |
76 | 'ext': 'mp4', | |
77 | 'title': 'Rick Astley Never Gonna Give You mp4', | |
78 | 'thumbnail': r're:https://.*\.jpg' | |
79 | } | |
80 | }, { | |
59296bae RA |
81 | 'url': 'http://xvideosharing.com/fq65f94nd2ve', |
82 | 'md5': '4181f63957e8fe90ac836fa58dc3c8a6', | |
953b3586 | 83 | 'info_dict': { |
59296bae | 84 | 'id': 'fq65f94nd2ve', |
953b3586 | 85 | 'ext': 'mp4', |
59296bae | 86 | 'title': 'sample', |
ec85ded8 | 87 | 'thumbnail': r're:http://.*\.jpg', |
2cd668ee | 88 | }, |
00dd0cd5 | 89 | }, { |
90 | 'url': 'https://aparat.cam/n4d6dh0wvlpr', | |
91 | 'only_matching': True, | |
41d1cca3 | 92 | }, { |
93 | 'url': 'https://wolfstream.tv/nthme29v9u2x', | |
94 | 'only_matching': True, | |
5f28a1ac | 95 | }] |
617c0b22 | 96 | |
97 | def _real_extract(self, url): | |
5ad28e7f | 98 | host, video_id = self._match_valid_url(url).groups() |
617c0b22 | 99 | |
59296bae | 100 | url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) |
e213c98d | 101 | webpage = self._download_webpage(url, video_id) |
617c0b22 | 102 | |
33b72ce6 | 103 | if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): |
3ae165aa S |
104 | raise ExtractorError('Video %s does not exist' % video_id, expected=True) |
105 | ||
f8da79f8 | 106 | fields = self._hidden_inputs(webpage) |
5f6a1245 | 107 | |
59296bae | 108 | if fields.get('op') == 'download1': |
ceb33673 S |
109 | countdown = int_or_none(self._search_regex( |
110 | r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>', | |
111 | webpage, 'countdown', default=None)) | |
112 | if countdown: | |
113 | self._sleep(countdown, video_id) | |
114 | ||
16bc9582 S |
115 | webpage = self._download_webpage( |
116 | url, video_id, 'Downloading video page', | |
117 | data=urlencode_postdata(fields), headers={ | |
118 | 'Referer': url, | |
119 | 'Content-type': 'application/x-www-form-urlencoded', | |
120 | }) | |
5f28a1ac | 121 | |
668db403 | 122 | title = (self._search_regex( |
190d2027 | 123 | (r'style="z-index: [0-9]+;">([^<]+)</span>', |
b9ad1019 | 124 | r'<td nowrap>([^<]+)</td>', |
1ad61430 | 125 | r'h4-fine[^>]*>([^<]+)<', |
59296bae | 126 | r'>Watch (.+)[ <]', |
190d2027 | 127 | r'<h2 class="video-page-head">([^<]+)</h2>', |
59296bae RA |
128 | r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to |
129 | r'title\s*:\s*"([^"]+)"'), # govid.me | |
190d2027 S |
130 | webpage, 'title', default=None) or self._og_search_title( |
131 | webpage, default=None) or video_id).strip() | |
bccdac68 | 132 | |
59296bae RA |
133 | for regex, func in ( |
134 | (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes), | |
135 | (r'(゚.+)', aa_decode)): | |
136 | obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None) | |
137 | if obf_code: | |
138 | webpage = webpage.replace(obf_code, func(obf_code)) | |
139 | ||
140 | formats = [] | |
141 | ||
142 | jwplayer_data = self._search_regex( | |
143 | [ | |
144 | r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);', | |
145 | r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);', | |
146 | ], webpage, | |
147 | 'jwplayer data', default=None) | |
148 | if jwplayer_data: | |
149 | jwplayer_data = self._parse_json( | |
150 | jwplayer_data.replace(r"\'", "'"), video_id, js_to_json) | |
151 | if jwplayer_data: | |
152 | formats = self._parse_jwplayer_data( | |
153 | jwplayer_data, video_id, False, | |
154 | m3u8_id='hls', mpd_id='dash')['formats'] | |
155 | ||
156 | if not formats: | |
2cd668ee S |
157 | urls = [] |
158 | for regex in ( | |
02d61a65 | 159 | r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', |
2cd668ee S |
160 | r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1', |
161 | r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)', | |
162 | r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'): | |
163 | for mobj in re.finditer(regex, webpage): | |
164 | video_url = mobj.group('url') | |
165 | if video_url not in urls: | |
166 | urls.append(video_url) | |
59296bae RA |
167 | |
168 | sources = self._search_regex( | |
169 | r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None) | |
170 | if sources: | |
171 | urls.extend(self._parse_json(sources, video_id)) | |
172 | ||
2cd668ee S |
173 | formats = [] |
174 | for video_url in urls: | |
175 | if determine_ext(video_url) == 'm3u8': | |
176 | formats.extend(self._extract_m3u8_formats( | |
177 | video_url, video_id, 'mp4', | |
178 | entry_protocol='m3u8_native', m3u8_id='hls', | |
179 | fatal=False)) | |
180 | else: | |
181 | formats.append({ | |
182 | 'url': video_url, | |
183 | 'format_id': 'sd', | |
184 | }) | |
59296bae | 185 | self._sort_formats(formats) |
bccdac68 | 186 | |
ceb33673 | 187 | thumbnail = self._search_regex( |
59296bae RA |
188 | [ |
189 | r'<video[^>]+poster="([^"]+)"', | |
190 | r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],', | |
191 | ], webpage, 'thumbnail', default=None) | |
5f28a1ac | 192 | |
5f28a1ac | 193 | return { |
617c0b22 | 194 | 'id': video_id, |
195 | 'title': title, | |
5f28a1ac PP |
196 | 'thumbnail': thumbnail, |
197 | 'formats': formats, | |
d8657ff7 | 198 | 'http_headers': {'Referer': url} |
617c0b22 | 199 | } |