]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..aes import aes_decrypt_text | |
5 | from ..compat import compat_urllib_parse_unquote | |
6 | from ..utils import ( | |
7 | determine_ext, | |
8 | format_field, | |
9 | int_or_none, | |
10 | str_to_int, | |
11 | strip_or_none, | |
12 | url_or_none, | |
13 | ) | |
14 | ||
15 | ||
16 | class Tube8IE(InfoExtractor): | |
17 | _WORKING = False | |
18 | _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' | |
19 | _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)'] | |
20 | _TESTS = [{ | |
21 | 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', | |
22 | 'md5': '65e20c48e6abff62ed0c3965fff13a39', | |
23 | 'info_dict': { | |
24 | 'id': '229795', | |
25 | 'display_id': 'kasia-music-video', | |
26 | 'ext': 'mp4', | |
27 | 'description': 'hot teen Kasia grinding', | |
28 | 'uploader': 'unknown', | |
29 | 'title': 'Kasia music video', | |
30 | 'age_limit': 18, | |
31 | 'duration': 230, | |
32 | 'categories': ['Teen'], | |
33 | 'tags': ['dancing'], | |
34 | }, | |
35 | }, { | |
36 | 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', | |
37 | 'only_matching': True, | |
38 | }] | |
39 | ||
40 | def _extract_info(self, url, fatal=True): | |
41 | mobj = self._match_valid_url(url) | |
42 | video_id = mobj.group('id') | |
43 | display_id = (mobj.group('display_id') | |
44 | if 'display_id' in mobj.groupdict() | |
45 | else None) or mobj.group('id') | |
46 | ||
47 | webpage = self._download_webpage( | |
48 | url, display_id, headers={'Cookie': 'age_verified=1'}) | |
49 | ||
50 | formats = [] | |
51 | format_urls = set() | |
52 | ||
53 | title = None | |
54 | thumbnail = None | |
55 | duration = None | |
56 | encrypted = False | |
57 | ||
58 | def extract_format(format_url, height=None): | |
59 | format_url = url_or_none(format_url) | |
60 | if not format_url or not format_url.startswith(('http', '//')): | |
61 | return | |
62 | if format_url in format_urls: | |
63 | return | |
64 | format_urls.add(format_url) | |
65 | tbr = int_or_none(self._search_regex( | |
66 | r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None)) | |
67 | if not height: | |
68 | height = int_or_none(self._search_regex( | |
69 | r'[/_](\d+)[pP][/_]', format_url, 'height', default=None)) | |
70 | if encrypted: | |
71 | format_url = aes_decrypt_text( | |
72 | video_url, title, 32).decode('utf-8') | |
73 | formats.append({ | |
74 | 'url': format_url, | |
75 | 'format_id': format_field(height, None, '%dp'), | |
76 | 'height': height, | |
77 | 'tbr': tbr, | |
78 | }) | |
79 | ||
80 | flashvars = self._parse_json( | |
81 | self._search_regex( | |
82 | r'flashvars\s*=\s*({.+?});', webpage, | |
83 | 'flashvars', default='{}'), | |
84 | display_id, fatal=False) | |
85 | ||
86 | if flashvars: | |
87 | title = flashvars.get('video_title') | |
88 | thumbnail = flashvars.get('image_url') | |
89 | duration = int_or_none(flashvars.get('video_duration')) | |
90 | encrypted = flashvars.get('encrypted') is True | |
91 | for key, value in flashvars.items(): | |
92 | mobj = re.search(r'quality_(\d+)[pP]', key) | |
93 | if mobj: | |
94 | extract_format(value, int(mobj.group(1))) | |
95 | video_url = flashvars.get('video_url') | |
96 | if video_url and determine_ext(video_url, None): | |
97 | extract_format(video_url) | |
98 | ||
99 | video_url = self._html_search_regex( | |
100 | r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1', | |
101 | webpage, 'video url', default=None, group='url') | |
102 | if video_url: | |
103 | extract_format(compat_urllib_parse_unquote(video_url)) | |
104 | ||
105 | if not formats: | |
106 | if 'title="This video is no longer available"' in webpage: | |
107 | self.raise_no_formats( | |
108 | 'Video %s is no longer available' % video_id, expected=True) | |
109 | ||
110 | if not title: | |
111 | title = self._html_search_regex( | |
112 | r'<h1[^>]*>([^<]+)', webpage, 'title') | |
113 | ||
114 | return webpage, { | |
115 | 'id': video_id, | |
116 | 'display_id': display_id, | |
117 | 'title': strip_or_none(title), | |
118 | 'thumbnail': thumbnail, | |
119 | 'duration': duration, | |
120 | 'age_limit': 18, | |
121 | 'formats': formats, | |
122 | } | |
123 | ||
124 | def _real_extract(self, url): | |
125 | webpage, info = self._extract_info(url) | |
126 | ||
127 | if not info['title']: | |
128 | info['title'] = self._html_search_regex( | |
129 | r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') | |
130 | ||
131 | description = self._html_search_regex( | |
132 | r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False) | |
133 | uploader = self._html_search_regex( | |
134 | r'<span class="username">\s*(.+?)\s*<', | |
135 | webpage, 'uploader', fatal=False) | |
136 | ||
137 | like_count = int_or_none(self._search_regex( | |
138 | r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False)) | |
139 | dislike_count = int_or_none(self._search_regex( | |
140 | r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) | |
141 | view_count = str_to_int(self._search_regex( | |
142 | r'Views:\s*</dt>\s*<dd>([\d,\.]+)', | |
143 | webpage, 'view count', fatal=False)) | |
144 | comment_count = str_to_int(self._search_regex( | |
145 | r'<span id="allCommentsCount">(\d+)</span>', | |
146 | webpage, 'comment count', fatal=False)) | |
147 | ||
148 | category = self._search_regex( | |
149 | r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)', | |
150 | webpage, 'category', fatal=False) | |
151 | categories = [category] if category else None | |
152 | ||
153 | tags_str = self._search_regex( | |
154 | r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)', | |
155 | webpage, 'tags', fatal=False) | |
156 | tags = [t for t in re.findall( | |
157 | r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None | |
158 | ||
159 | info.update({ | |
160 | 'description': description, | |
161 | 'uploader': uploader, | |
162 | 'view_count': view_count, | |
163 | 'like_count': like_count, | |
164 | 'dislike_count': dislike_count, | |
165 | 'comment_count': comment_count, | |
166 | 'categories': categories, | |
167 | 'tags': tags, | |
168 | }) | |
169 | ||
170 | return info |