]>
Commit | Line | Data |
---|---|---|
031ec536 | 1 | # coding: utf-8 |
617c0b22 | 2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
1cc79574 | 7 | from ..utils import ( |
bccdac68 | 8 | decode_packed_codes, |
1cc79574 | 9 | ExtractorError, |
ceb33673 | 10 | int_or_none, |
bccdac68 | 11 | NO_DEFAULT, |
5c2266df | 12 | sanitized_Request, |
6e6bc8da | 13 | urlencode_postdata, |
5f28a1ac PP |
14 | ) |
15 | ||
617c0b22 | 16 | |
031ec536 | 17 | class XFileShareIE(InfoExtractor): |
41745523 S |
18 | _SITES = ( |
19 | ('daclips.in', 'DaClips'), | |
20 | ('filehoot.com', 'FileHoot'), | |
21 | ('gorillavid.in', 'GorillaVid'), | |
22 | ('movpod.in', 'MovPod'), | |
23 | ('powerwatch.pw', 'PowerWatch'), | |
24 | ('rapidvideo.ws', 'Rapidvideo.ws'), | |
25 | ('thevideobee.to', 'TheVideoBee'), | |
26 | ('vidto.me', 'Vidto'), | |
57d8e32a | 27 | ('streamin.to', 'Streamin.To'), |
bccdac68 | 28 | ('xvidstage.com', 'XVIDSTAGE'), |
41745523 S |
29 | ) |
30 | ||
31 | IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) | |
32 | _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' | |
33 | % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0])) | |
5f28a1ac | 34 | |
33b72ce6 S |
35 | _FILE_NOT_FOUND_REGEXES = ( |
36 | r'>(?:404 - )?File Not Found<', | |
37 | r'>The file was removed by administrator<', | |
38 | ) | |
3ae165aa | 39 | |
5f28a1ac PP |
40 | _TESTS = [{ |
41 | 'url': 'http://gorillavid.in/06y9juieqpmi', | |
42 | 'md5': '5ae4a3580620380619678ee4875893ba', | |
43 | 'info_dict': { | |
44 | 'id': '06y9juieqpmi', | |
cf2bf840 | 45 | 'ext': 'mp4', |
e4b85e35 | 46 | 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', |
5f28a1ac PP |
47 | 'thumbnail': 're:http://.*\.jpg', |
48 | }, | |
49 | }, { | |
50 | 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', | |
1ed34f3d | 51 | 'only_matching': True, |
953b3586 PH |
52 | }, { |
53 | 'url': 'http://daclips.in/3rso4kdn6f9m', | |
aaefb347 | 54 | 'md5': '1ad8fd39bb976eeb66004d3a4895f106', |
953b3586 PH |
55 | 'info_dict': { |
56 | 'id': '3rso4kdn6f9m', | |
57 | 'ext': 'mp4', | |
2e9ff8f3 | 58 | 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc', |
953b3586 | 59 | 'thumbnail': 're:http://.*\.jpg', |
2e9ff8f3 | 60 | } |
b81f484b PH |
61 | }, { |
62 | 'url': 'http://movpod.in/0wguyyxi1yca', | |
63 | 'only_matching': True, | |
c7c0996d S |
64 | }, { |
65 | 'url': 'http://filehoot.com/3ivfabn7573c.html', | |
66 | 'info_dict': { | |
67 | 'id': '3ivfabn7573c', | |
68 | 'ext': 'mp4', | |
69 | 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', | |
70 | 'thumbnail': 're:http://.*\.jpg', | |
436214ba YCH |
71 | }, |
72 | 'skip': 'Video removed', | |
668db403 S |
73 | }, { |
74 | 'url': 'http://vidto.me/ku5glz52nqe1.html', | |
75 | 'info_dict': { | |
76 | 'id': 'ku5glz52nqe1', | |
77 | 'ext': 'mp4', | |
78 | 'title': 'test' | |
79 | } | |
1ad61430 S |
80 | }, { |
81 | 'url': 'http://powerwatch.pw/duecjibvicbu', | |
82 | 'info_dict': { | |
83 | 'id': 'duecjibvicbu', | |
84 | 'ext': 'mp4', | |
85 | 'title': 'Big Buck Bunny trailer', | |
86 | }, | |
bccdac68 S |
87 | }, { |
88 | 'url': 'http://xvidstage.com/e0qcnl03co6z', | |
89 | 'info_dict': { | |
90 | 'id': 'e0qcnl03co6z', | |
91 | 'ext': 'mp4', | |
92 | 'title': 'Chucky Prank 2015.mp4', | |
93 | }, | |
33b72ce6 S |
94 | }, { |
95 | # removed by administrator | |
96 | 'url': 'http://xvidstage.com/amfy7atlkx25', | |
97 | 'only_matching': True, | |
5f28a1ac | 98 | }] |
617c0b22 | 99 | |
100 | def _real_extract(self, url): | |
101 | mobj = re.match(self._VALID_URL, url) | |
102 | video_id = mobj.group('id') | |
103 | ||
e213c98d S |
104 | url = 'http://%s/%s' % (mobj.group('host'), video_id) |
105 | webpage = self._download_webpage(url, video_id) | |
617c0b22 | 106 | |
33b72ce6 | 107 | if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): |
3ae165aa S |
108 | raise ExtractorError('Video %s does not exist' % video_id, expected=True) |
109 | ||
f8da79f8 | 110 | fields = self._hidden_inputs(webpage) |
5f6a1245 | 111 | |
5f28a1ac | 112 | if fields['op'] == 'download1': |
ceb33673 S |
113 | countdown = int_or_none(self._search_regex( |
114 | r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>', | |
115 | webpage, 'countdown', default=None)) | |
116 | if countdown: | |
117 | self._sleep(countdown, video_id) | |
118 | ||
6e6bc8da | 119 | post = urlencode_postdata(fields) |
5f28a1ac | 120 | |
5c2266df | 121 | req = sanitized_Request(url, post) |
5f28a1ac | 122 | req.add_header('Content-type', 'application/x-www-form-urlencoded') |
617c0b22 | 123 | |
5f28a1ac PP |
124 | webpage = self._download_webpage(req, video_id, 'Downloading video page') |
125 | ||
668db403 | 126 | title = (self._search_regex( |
b9ad1019 S |
127 | [r'style="z-index: [0-9]+;">([^<]+)</span>', |
128 | r'<td nowrap>([^<]+)</td>', | |
1ad61430 | 129 | r'h4-fine[^>]*>([^<]+)<', |
b9ad1019 S |
130 | r'>Watch (.+) ', |
131 | r'<h2 class="video-page-head">([^<]+)</h2>'], | |
668db403 | 132 | webpage, 'title', default=None) or self._og_search_title(webpage)).strip() |
bccdac68 S |
133 | |
134 | def extract_video_url(default=NO_DEFAULT): | |
135 | return self._search_regex( | |
136 | (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,', | |
137 | r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1', | |
138 | r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)', | |
139 | r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'), | |
140 | webpage, 'file url', default=default, group='url') | |
141 | ||
142 | video_url = extract_video_url(default=None) | |
143 | ||
144 | if not video_url: | |
145 | webpage = decode_packed_codes(self._search_regex( | |
146 | r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", | |
147 | webpage, 'packed code')) | |
148 | video_url = extract_video_url() | |
149 | ||
ceb33673 | 150 | thumbnail = self._search_regex( |
b9ad1019 | 151 | r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) |
5f28a1ac PP |
152 | |
153 | formats = [{ | |
154 | 'format_id': 'sd', | |
e4b85e35 | 155 | 'url': video_url, |
5f28a1ac PP |
156 | 'quality': 1, |
157 | }] | |
158 | ||
159 | return { | |
617c0b22 | 160 | 'id': video_id, |
161 | 'title': title, | |
5f28a1ac PP |
162 | 'thumbnail': thumbnail, |
163 | 'formats': formats, | |
617c0b22 | 164 | } |