]>
Commit | Line | Data |
---|---|---|
66aa382e NJ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from ..utils import ( | |
5 | ExtractorError, | |
6 | compat_urllib_parse, | |
7 | compat_urllib_request, | |
798a2cad | 8 | determine_ext, |
66aa382e NJ |
9 | ) |
10 | import re | |
11 | ||
12 | from .common import InfoExtractor | |
13 | ||
14 | ||
15 | class SockshareIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)' | |
17 | _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>' | |
18 | _TEST = { | |
19 | 'url': 'http://www.sockshare.com/file/437BE28B89D799D7', | |
20 | 'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', | |
21 | 'info_dict': { | |
22 | 'id': '437BE28B89D799D7', | |
23 | 'title': 'big_buck_bunny_720p_surround.avi', | |
24 | 'ext': 'avi', | |
25 | 'thumbnail': 're:^http://.*\.jpg$', | |
26 | } | |
27 | } | |
28 | ||
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | video_id = mobj.group('id') | |
32 | ||
33 | url = 'http://sockshare.com/file/%s' % video_id | |
34 | webpage = self._download_webpage(url, video_id) | |
35 | ||
36 | if re.search(self._FILE_DELETED_REGEX, webpage) is not None: | |
06c15542 | 37 | raise ExtractorError('Video %s does not exist' % video_id, |
66aa382e NJ |
38 | expected=True) |
39 | ||
40 | confirm_hash = self._html_search_regex(r'''(?x)<input\s+ | |
41 | type="hidden"\s+ | |
42 | value="([^"]*)"\s+ | |
43 | name="hash" | |
44 | ''', webpage, 'hash') | |
45 | ||
46 | fields = { | |
47 | "hash": confirm_hash, | |
48 | "confirm": "Continue as Free User" | |
49 | } | |
50 | ||
51 | post = compat_urllib_parse.urlencode(fields) | |
52 | req = compat_urllib_request.Request(url, post) | |
53 | # Apparently, this header is required for confirmation to work. | |
54 | req.add_header('Host', 'www.sockshare.com') | |
55 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
56 | ||
06c15542 PH |
57 | webpage = self._download_webpage( |
58 | req, video_id, 'Downloading video page') | |
66aa382e | 59 | |
06c15542 PH |
60 | video_url = self._html_search_regex( |
61 | r'<a href="([^"]*)".+class="download_file_link"', | |
62 | webpage, 'file url') | |
66aa382e | 63 | video_url = "http://www.sockshare.com" + video_url |
6abb0661 PH |
64 | title = self._html_search_regex(( |
65 | r'<h1>(.+)<strong>', | |
66 | r'var name = "([^"]+)";'), | |
67 | webpage, 'title', default=None) | |
06c15542 PH |
68 | thumbnail = self._html_search_regex( |
69 | r'<img\s+src="([^"]*)".+?name="bg"', | |
70 | webpage, 'thumbnail') | |
66aa382e NJ |
71 | |
72 | formats = [{ | |
73 | 'format_id': 'sd', | |
74 | 'url': video_url, | |
798a2cad | 75 | 'ext': determine_ext(title), |
66aa382e NJ |
76 | }] |
77 | ||
78 | return { | |
79 | 'id': video_id, | |
80 | 'title': title, | |
81 | 'thumbnail': thumbnail, | |
82 | 'formats': formats, | |
83 | } |