]>
Commit | Line | Data |
---|---|---|
426ff042 PH |
1 | import re |
2 | import os | |
3 | import socket | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_http_client, | |
8 | compat_str, | |
9 | compat_urllib_error, | |
10 | compat_urllib_parse, | |
11 | compat_urllib_request, | |
12 | ||
13 | ExtractorError, | |
14 | ) | |
15 | ||
16 | ||
17 | class DepositFilesIE(InfoExtractor): | |
18 | """Information extractor for depositfiles.com""" | |
19 | ||
20 | _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' | |
21 | ||
22 | def _real_extract(self, url): | |
23 | file_id = url.split('/')[-1] | |
24 | # Rebuild url in english locale | |
25 | url = 'http://depositfiles.com/en/files/' + file_id | |
26 | ||
27 | # Retrieve file webpage with 'Free download' button pressed | |
a56f9de1 | 28 | free_download_indication = {'gateway_result' : '1'} |
426ff042 PH |
29 | request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) |
30 | try: | |
31 | self.report_download_webpage(file_id) | |
32 | webpage = compat_urllib_request.urlopen(request).read() | |
33 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
34 | raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) | |
35 | ||
36 | # Search for the real file URL | |
37 | mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) | |
38 | if (mobj is None) or (mobj.group(1) is None): | |
39 | # Try to figure out reason of the error. | |
40 | mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) | |
41 | if (mobj is not None) and (mobj.group(1) is not None): | |
42 | restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() | |
43 | raise ExtractorError(u'%s' % restriction_message) | |
44 | else: | |
45 | raise ExtractorError(u'Unable to extract download URL from: %s' % url) | |
46 | ||
47 | file_url = mobj.group(1) | |
48 | file_extension = os.path.splitext(file_url)[1][1:] | |
49 | ||
50 | # Search for file title | |
51 | file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') | |
52 | ||
53 | return [{ | |
54 | 'id': file_id.decode('utf-8'), | |
55 | 'url': file_url.decode('utf-8'), | |
56 | 'uploader': None, | |
57 | 'upload_date': None, | |
58 | 'title': file_title, | |
59 | 'ext': file_extension.decode('utf-8'), | |
60 | }] |