]>
Commit | Line | Data |
---|---|---|
66aa382e NJ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
1cc79574 PH |
4 | import re |
5 | ||
6 | from ..compat import ( | |
66aa382e NJ |
7 | compat_urllib_parse, |
8 | compat_urllib_request, | |
1cc79574 PH |
9 | ) |
10 | from ..utils import ( | |
798a2cad | 11 | determine_ext, |
1cc79574 | 12 | ExtractorError, |
66aa382e | 13 | ) |
66aa382e NJ |
14 | |
15 | from .common import InfoExtractor | |
16 | ||
17 | ||
18 | class SockshareIE(InfoExtractor): | |
19 | _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)' | |
20 | _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>' | |
21 | _TEST = { | |
22 | 'url': 'http://www.sockshare.com/file/437BE28B89D799D7', | |
23 | 'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', | |
24 | 'info_dict': { | |
25 | 'id': '437BE28B89D799D7', | |
26 | 'title': 'big_buck_bunny_720p_surround.avi', | |
27 | 'ext': 'avi', | |
66aa382e NJ |
28 | } |
29 | } | |
30 | ||
31 | def _real_extract(self, url): | |
1cc79574 | 32 | video_id = self._match_id(url) |
66aa382e NJ |
33 | url = 'http://sockshare.com/file/%s' % video_id |
34 | webpage = self._download_webpage(url, video_id) | |
35 | ||
36 | if re.search(self._FILE_DELETED_REGEX, webpage) is not None: | |
06c15542 | 37 | raise ExtractorError('Video %s does not exist' % video_id, |
66aa382e NJ |
38 | expected=True) |
39 | ||
40 | confirm_hash = self._html_search_regex(r'''(?x)<input\s+ | |
41 | type="hidden"\s+ | |
42 | value="([^"]*)"\s+ | |
43 | name="hash" | |
44 | ''', webpage, 'hash') | |
45 | ||
46 | fields = { | |
71d53ace | 47 | "hash": confirm_hash.encode('utf-8'), |
66aa382e NJ |
48 | "confirm": "Continue as Free User" |
49 | } | |
50 | ||
51 | post = compat_urllib_parse.urlencode(fields) | |
52 | req = compat_urllib_request.Request(url, post) | |
53 | # Apparently, this header is required for confirmation to work. | |
54 | req.add_header('Host', 'www.sockshare.com') | |
55 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
56 | ||
06c15542 PH |
57 | webpage = self._download_webpage( |
58 | req, video_id, 'Downloading video page') | |
66aa382e | 59 | |
06c15542 PH |
60 | video_url = self._html_search_regex( |
61 | r'<a href="([^"]*)".+class="download_file_link"', | |
62 | webpage, 'file url') | |
66aa382e | 63 | video_url = "http://www.sockshare.com" + video_url |
6abb0661 PH |
64 | title = self._html_search_regex(( |
65 | r'<h1>(.+)<strong>', | |
66 | r'var name = "([^"]+)";'), | |
67 | webpage, 'title', default=None) | |
06c15542 PH |
68 | thumbnail = self._html_search_regex( |
69 | r'<img\s+src="([^"]*)".+?name="bg"', | |
71d53ace | 70 | webpage, 'thumbnail', default=None) |
66aa382e NJ |
71 | |
72 | formats = [{ | |
73 | 'format_id': 'sd', | |
74 | 'url': video_url, | |
798a2cad | 75 | 'ext': determine_ext(title), |
66aa382e NJ |
76 | }] |
77 | ||
78 | return { | |
79 | 'id': video_id, | |
80 | 'title': title, | |
81 | 'thumbnail': thumbnail, | |
82 | 'formats': formats, | |
83 | } |