]>
Commit | Line | Data |
---|---|---|
c5ba203e AS |
1 | from __future__ import unicode_literals |
2 | ||
34dd81c0 | 3 | import itertools |
dcc2a706 | 4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
1cc79574 | 7 | from ..utils import ( |
34dd81c0 | 8 | int_or_none, |
24eb7c25 | 9 | js_to_json, |
f4db0917 | 10 | orderedSet, |
1b734adb | 11 | parse_duration, |
5c2266df | 12 | sanitized_Request, |
607dbbad | 13 | str_to_int, |
dcc2a706 | 14 | ) |
15 | ||
607dbbad | 16 | |
dcc2a706 | 17 | class XTubeIE(InfoExtractor): |
1b734adb S |
18 | _VALID_URL = r'''(?x) |
19 | (?: | |
20 | xtube:| | |
ac93c09a | 21 | https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-) |
1b734adb S |
22 | ) |
23 | (?P<id>[^/?&#]+) | |
24 | ''' | |
86be3cdc S |
25 | |
26 | _TESTS = [{ | |
27 | # old URL schema | |
c5ba203e | 28 | 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', |
c5ba203e AS |
29 | 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', |
30 | 'info_dict': { | |
607dbbad S |
31 | 'id': 'kVTUy_G222_', |
32 | 'ext': 'mp4', | |
33 | 'title': 'strange erotica', | |
9789d753 | 34 | 'description': 'contains:an ET kind of thing', |
607dbbad S |
35 | 'uploader': 'greenshowers', |
36 | 'duration': 450, | |
1b734adb S |
37 | 'view_count': int, |
38 | 'comment_count': int, | |
607dbbad | 39 | 'age_limit': 18, |
dcc2a706 | 40 | } |
24eb7c25 YCH |
41 | }, { |
42 | # FLV videos with duplicated formats | |
43 | 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752', | |
44 | 'md5': 'a406963eb349dd43692ec54631efd88b', | |
45 | 'info_dict': { | |
46 | 'id': '9299752', | |
47 | 'display_id': 'A-Super-Run-Part-1-YT', | |
48 | 'ext': 'flv', | |
49 | 'title': 'A Super Run - Part 1 (YT)', | |
50 | 'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93', | |
51 | 'uploader': 'tshirtguy59', | |
52 | 'duration': 579, | |
53 | 'view_count': int, | |
54 | 'comment_count': int, | |
55 | 'age_limit': 18, | |
56 | }, | |
86be3cdc S |
57 | }, { |
58 | # new URL schema | |
59 | 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', | |
60 | 'only_matching': True, | |
61 | }, { | |
62 | 'url': 'xtube:625837', | |
63 | 'only_matching': True, | |
085f169f S |
64 | }, { |
65 | 'url': 'xtube:kVTUy_G222_', | |
66 | 'only_matching': True, | |
ac93c09a S |
67 | }, { |
68 | 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big', | |
69 | 'only_matching': True, | |
86be3cdc | 70 | }] |
dcc2a706 | 71 | |
72 | def _real_extract(self, url): | |
86be3cdc S |
73 | mobj = re.match(self._VALID_URL, url) |
74 | video_id = mobj.group('id') | |
75 | display_id = mobj.group('display_id') | |
76 | ||
77 | if not display_id: | |
78 | display_id = video_id | |
86be3cdc | 79 | |
085f169f S |
80 | if video_id.isdigit() and len(video_id) < 11: |
81 | url_pattern = 'http://www.xtube.com/video-watch/-%s' | |
82 | else: | |
83 | url_pattern = 'http://www.xtube.com/watch.php?v=%s' | |
84 | ||
85 | webpage = self._download_webpage( | |
86 | url_pattern % video_id, display_id, headers={ | |
87 | 'Cookie': 'age_verified=1; cookiesAccepted=1', | |
88 | }) | |
dcc2a706 | 89 | |
1b734adb | 90 | sources = self._parse_json(self._search_regex( |
24eb7c25 YCH |
91 | r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', |
92 | webpage, 'sources', group='sources'), video_id, | |
93 | transform_source=js_to_json) | |
1b734adb S |
94 | |
95 | formats = [] | |
96 | for format_id, format_url in sources.items(): | |
97 | formats.append({ | |
98 | 'url': format_url, | |
99 | 'format_id': format_id, | |
100 | 'height': int_or_none(format_id), | |
101 | }) | |
24eb7c25 | 102 | self._remove_duplicate_formats(formats) |
1b734adb S |
103 | self._sort_formats(formats) |
104 | ||
105 | title = self._search_regex( | |
f6d6ca1d | 106 | (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), |
1b734adb | 107 | webpage, 'title', group='title') |
86be3cdc S |
108 | description = self._search_regex( |
109 | r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) | |
1b734adb S |
110 | uploader = self._search_regex( |
111 | (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', | |
112 | r'<span[^>]+class="nickname"[^>]*>([^<]+)'), | |
113 | webpage, 'uploader', fatal=False) | |
114 | duration = parse_duration(self._search_regex( | |
9150d1eb | 115 | r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>', |
1b734adb | 116 | webpage, 'duration', fatal=False)) |
86be3cdc | 117 | view_count = str_to_int(self._search_regex( |
9150d1eb | 118 | r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>', |
16ea8179 S |
119 | webpage, 'view count', fatal=False)) |
120 | comment_count = str_to_int(self._html_search_regex( | |
86be3cdc | 121 | r'>Comments? \(([\d,\.]+)\)<', |
16ea8179 | 122 | webpage, 'comment count', fatal=False)) |
aa488e13 | 123 | |
dcc2a706 | 124 | return { |
125 | 'id': video_id, | |
86be3cdc | 126 | 'display_id': display_id, |
86be3cdc S |
127 | 'title': title, |
128 | 'description': description, | |
129 | 'uploader': uploader, | |
607dbbad S |
130 | 'duration': duration, |
131 | 'view_count': view_count, | |
132 | 'comment_count': comment_count, | |
dcc2a706 | 133 | 'age_limit': 18, |
1b734adb | 134 | 'formats': formats, |
9f5809b3 | 135 | } |
136 | ||
22a6f150 | 137 | |
9f5809b3 | 138 | class XTubeUserIE(InfoExtractor): |
139 | IE_DESC = 'XTube user profile' | |
34dd81c0 | 140 | _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)' |
22a6f150 | 141 | _TEST = { |
34dd81c0 | 142 | 'url': 'http://www.xtube.com/profile/greenshowers-4056496', |
22a6f150 | 143 | 'info_dict': { |
34dd81c0 | 144 | 'id': 'greenshowers-4056496', |
05900629 | 145 | 'age_limit': 18, |
22a6f150 PH |
146 | }, |
147 | 'playlist_mincount': 155, | |
148 | } | |
9f5809b3 | 149 | |
150 | def _real_extract(self, url): | |
34dd81c0 S |
151 | user_id = self._match_id(url) |
152 | ||
153 | entries = [] | |
154 | for pagenum in itertools.count(1): | |
155 | request = sanitized_Request( | |
156 | 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum), | |
157 | headers={ | |
158 | 'Cookie': 'popunder=4', | |
159 | 'X-Requested-With': 'XMLHttpRequest', | |
160 | 'Referer': url, | |
161 | }) | |
162 | ||
163 | page = self._download_json( | |
164 | request, user_id, 'Downloading videos JSON page %d' % pagenum) | |
165 | ||
166 | html = page.get('html') | |
167 | if not html: | |
168 | break | |
169 | ||
f4db0917 S |
170 | for video_id in orderedSet([video_id for _, video_id in re.findall( |
171 | r'data-plid=(["\'])(.+?)\1', html)]): | |
34dd81c0 S |
172 | entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key())) |
173 | ||
174 | page_count = int_or_none(page.get('pageCount')) | |
175 | if not page_count or pagenum == page_count: | |
176 | break | |
177 | ||
178 | playlist = self.playlist_result(entries, user_id) | |
179 | playlist['age_limit'] = 18 | |
180 | return playlist |