]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | parse_duration, | |
8 | parse_iso8601, | |
9 | sanitized_Request, | |
10 | str_to_int, | |
11 | ) | |
12 | ||
13 | ||
14 | class FourTubeIE(InfoExtractor): | |
15 | IE_NAME = '4tube' | |
16 | _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)' | |
17 | ||
18 | _TEST = { | |
19 | 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', | |
20 | 'md5': '6516c8ac63b03de06bc8eac14362db4f', | |
21 | 'info_dict': { | |
22 | 'id': '209733', | |
23 | 'ext': 'mp4', | |
24 | 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', | |
25 | 'uploader': 'WCP Club', | |
26 | 'uploader_id': 'wcp-club', | |
27 | 'upload_date': '20131031', | |
28 | 'timestamp': 1383263892, | |
29 | 'duration': 583, | |
30 | 'view_count': int, | |
31 | 'like_count': int, | |
32 | 'categories': list, | |
33 | 'age_limit': 18, | |
34 | } | |
35 | } | |
36 | ||
37 | def _real_extract(self, url): | |
38 | video_id = self._match_id(url) | |
39 | webpage = self._download_webpage(url, video_id) | |
40 | ||
41 | title = self._html_search_meta('name', webpage) | |
42 | timestamp = parse_iso8601(self._html_search_meta( | |
43 | 'uploadDate', webpage)) | |
44 | thumbnail = self._html_search_meta('thumbnailUrl', webpage) | |
45 | uploader_id = self._html_search_regex( | |
46 | r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">', | |
47 | webpage, 'uploader id', fatal=False) | |
48 | uploader = self._html_search_regex( | |
49 | r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">', | |
50 | webpage, 'uploader', fatal=False) | |
51 | ||
52 | categories_html = self._search_regex( | |
53 | r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>', | |
54 | webpage, 'categories', fatal=False) | |
55 | categories = None | |
56 | if categories_html: | |
57 | categories = [ | |
58 | c.strip() for c in re.findall( | |
59 | r'(?s)<li><a.*?>(.*?)</a>', categories_html)] | |
60 | ||
61 | view_count = str_to_int(self._search_regex( | |
62 | r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">', | |
63 | webpage, 'view count', fatal=False)) | |
64 | like_count = str_to_int(self._search_regex( | |
65 | r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">', | |
66 | webpage, 'like count', fatal=False)) | |
67 | duration = parse_duration(self._html_search_meta('duration', webpage)) | |
68 | ||
69 | media_id = self._search_regex( | |
70 | r'<button[^>]+data-id=(["\'])(?P<id>\d+)\1[^>]+data-quality=', webpage, | |
71 | 'media id', default=None, group='id') | |
72 | sources = [ | |
73 | quality | |
74 | for _, quality in re.findall(r'<button[^>]+data-quality=(["\'])(.+?)\1', webpage)] | |
75 | if not (media_id and sources): | |
76 | player_js = self._download_webpage( | |
77 | self._search_regex( | |
78 | r'<script[^>]id=(["\'])playerembed\1[^>]+src=(["\'])(?P<url>.+?)\2', | |
79 | webpage, 'player JS', group='url'), | |
80 | video_id, 'Downloading player JS') | |
81 | params_js = self._search_regex( | |
82 | r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)', | |
83 | player_js, 'initialization parameters') | |
84 | params = self._parse_json('[%s]' % params_js, video_id) | |
85 | media_id = params[0] | |
86 | sources = ['%s' % p for p in params[2]] | |
87 | ||
88 | token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format( | |
89 | media_id, '+'.join(sources)) | |
90 | headers = { | |
91 | b'Content-Type': b'application/x-www-form-urlencoded', | |
92 | b'Origin': b'http://www.4tube.com', | |
93 | } | |
94 | token_req = sanitized_Request(token_url, b'{}', headers) | |
95 | tokens = self._download_json(token_req, video_id) | |
96 | formats = [{ | |
97 | 'url': tokens[format]['token'], | |
98 | 'format_id': format + 'p', | |
99 | 'resolution': format + 'p', | |
100 | 'quality': int(format), | |
101 | } for format in sources] | |
102 | self._sort_formats(formats) | |
103 | ||
104 | return { | |
105 | 'id': video_id, | |
106 | 'title': title, | |
107 | 'formats': formats, | |
108 | 'categories': categories, | |
109 | 'thumbnail': thumbnail, | |
110 | 'uploader': uploader, | |
111 | 'uploader_id': uploader_id, | |
112 | 'timestamp': timestamp, | |
113 | 'like_count': like_count, | |
114 | 'view_count': view_count, | |
115 | 'duration': duration, | |
116 | 'age_limit': 18, | |
117 | } |