]>
Commit | Line | Data |
---|---|---|
34dd81c0 | 1 | import itertools |
dcc2a706 | 2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
1cc79574 | 5 | from ..utils import ( |
34dd81c0 | 6 | int_or_none, |
24eb7c25 | 7 | js_to_json, |
f4db0917 | 8 | orderedSet, |
1b734adb | 9 | parse_duration, |
5c2266df | 10 | sanitized_Request, |
607dbbad | 11 | str_to_int, |
41d1cca3 | 12 | url_or_none, |
dcc2a706 | 13 | ) |
14 | ||
607dbbad | 15 | |
dcc2a706 | 16 | class XTubeIE(InfoExtractor): |
1b734adb S |
17 | _VALID_URL = r'''(?x) |
18 | (?: | |
19 | xtube:| | |
ac93c09a | 20 | https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-) |
1b734adb S |
21 | ) |
22 | (?P<id>[^/?&#]+) | |
23 | ''' | |
86be3cdc S |
24 | |
25 | _TESTS = [{ | |
26 | # old URL schema | |
c5ba203e | 27 | 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', |
c5ba203e AS |
28 | 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', |
29 | 'info_dict': { | |
607dbbad S |
30 | 'id': 'kVTUy_G222_', |
31 | 'ext': 'mp4', | |
32 | 'title': 'strange erotica', | |
9789d753 | 33 | 'description': 'contains:an ET kind of thing', |
607dbbad | 34 | 'uploader': 'greenshowers', |
8bdd16b4 | 35 | 'duration': 450, |
1b734adb S |
36 | 'view_count': int, |
37 | 'comment_count': int, | |
607dbbad | 38 | 'age_limit': 18, |
dcc2a706 | 39 | } |
86be3cdc S |
40 | }, { |
41 | # new URL schema | |
42 | 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', | |
43 | 'only_matching': True, | |
44 | }, { | |
45 | 'url': 'xtube:625837', | |
46 | 'only_matching': True, | |
085f169f S |
47 | }, { |
48 | 'url': 'xtube:kVTUy_G222_', | |
49 | 'only_matching': True, | |
ac93c09a S |
50 | }, { |
51 | 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big', | |
52 | 'only_matching': True, | |
86be3cdc | 53 | }] |
dcc2a706 | 54 | |
55 | def _real_extract(self, url): | |
5ad28e7f | 56 | mobj = self._match_valid_url(url) |
86be3cdc S |
57 | video_id = mobj.group('id') |
58 | display_id = mobj.group('display_id') | |
59 | ||
60 | if not display_id: | |
61 | display_id = video_id | |
86be3cdc | 62 | |
085f169f S |
63 | if video_id.isdigit() and len(video_id) < 11: |
64 | url_pattern = 'http://www.xtube.com/video-watch/-%s' | |
65 | else: | |
66 | url_pattern = 'http://www.xtube.com/watch.php?v=%s' | |
67 | ||
68 | webpage = self._download_webpage( | |
69 | url_pattern % video_id, display_id, headers={ | |
70 | 'Cookie': 'age_verified=1; cookiesAccepted=1', | |
71 | }) | |
dcc2a706 | 72 | |
41d1cca3 | 73 | title, thumbnail, duration, sources, media_definition = [None] * 5 |
e88b4507 | 74 | |
8bdd16b4 | 75 | config = self._parse_json(self._search_regex( |
41d1cca3 | 76 | r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config', |
8bdd16b4 | 77 | default='{}'), video_id, transform_source=js_to_json, fatal=False) |
78 | if config: | |
79 | config = config.get('mainRoll') | |
80 | if isinstance(config, dict): | |
81 | title = config.get('title') | |
82 | thumbnail = config.get('poster') | |
83 | duration = int_or_none(config.get('duration')) | |
84 | sources = config.get('sources') or config.get('format') | |
41d1cca3 | 85 | media_definition = config.get('mediaDefinition') |
e88b4507 | 86 | |
41d1cca3 | 87 | if not isinstance(sources, dict) and not media_definition: |
e88b4507 S |
88 | sources = self._parse_json(self._search_regex( |
89 | r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', | |
90 | webpage, 'sources', group='sources'), video_id, | |
91 | transform_source=js_to_json) | |
1b734adb S |
92 | |
93 | formats = [] | |
41d1cca3 | 94 | format_urls = set() |
95 | ||
96 | if isinstance(sources, dict): | |
97 | for format_id, format_url in sources.items(): | |
98 | format_url = url_or_none(format_url) | |
99 | if not format_url: | |
100 | continue | |
101 | if format_url in format_urls: | |
102 | continue | |
103 | format_urls.add(format_url) | |
104 | formats.append({ | |
105 | 'url': format_url, | |
106 | 'format_id': format_id, | |
107 | 'height': int_or_none(format_id), | |
108 | }) | |
109 | ||
110 | if isinstance(media_definition, list): | |
111 | for media in media_definition: | |
112 | video_url = url_or_none(media.get('videoUrl')) | |
113 | if not video_url: | |
114 | continue | |
115 | if video_url in format_urls: | |
116 | continue | |
117 | format_urls.add(video_url) | |
118 | format_id = media.get('format') | |
119 | if format_id == 'hls': | |
120 | formats.extend(self._extract_m3u8_formats( | |
121 | video_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
122 | m3u8_id='hls', fatal=False)) | |
123 | elif format_id == 'mp4': | |
124 | height = int_or_none(media.get('quality')) | |
125 | formats.append({ | |
126 | 'url': video_url, | |
127 | 'format_id': '%s-%d' % (format_id, height) if height else format_id, | |
128 | 'height': height, | |
129 | }) | |
130 | ||
24eb7c25 | 131 | self._remove_duplicate_formats(formats) |
1b734adb S |
132 | self._sort_formats(formats) |
133 | ||
e88b4507 S |
134 | if not title: |
135 | title = self._search_regex( | |
136 | (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), | |
137 | webpage, 'title', group='title') | |
138 | description = self._og_search_description( | |
139 | webpage, default=None) or self._html_search_meta( | |
140 | 'twitter:description', webpage, default=None) or self._search_regex( | |
86be3cdc | 141 | r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) |
1b734adb S |
142 | uploader = self._search_regex( |
143 | (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', | |
144 | r'<span[^>]+class="nickname"[^>]*>([^<]+)'), | |
145 | webpage, 'uploader', fatal=False) | |
e88b4507 S |
146 | if not duration: |
147 | duration = parse_duration(self._search_regex( | |
148 | r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>', | |
149 | webpage, 'duration', fatal=False)) | |
86be3cdc | 150 | view_count = str_to_int(self._search_regex( |
e88b4507 S |
151 | (r'["\']viewsCount["\'][^>]*>(\d+)\s+views', |
152 | r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'), | |
16ea8179 S |
153 | webpage, 'view count', fatal=False)) |
154 | comment_count = str_to_int(self._html_search_regex( | |
86be3cdc | 155 | r'>Comments? \(([\d,\.]+)\)<', |
16ea8179 | 156 | webpage, 'comment count', fatal=False)) |
aa488e13 | 157 | |
dcc2a706 | 158 | return { |
159 | 'id': video_id, | |
86be3cdc | 160 | 'display_id': display_id, |
86be3cdc S |
161 | 'title': title, |
162 | 'description': description, | |
e88b4507 | 163 | 'thumbnail': thumbnail, |
86be3cdc | 164 | 'uploader': uploader, |
607dbbad S |
165 | 'duration': duration, |
166 | 'view_count': view_count, | |
167 | 'comment_count': comment_count, | |
dcc2a706 | 168 | 'age_limit': 18, |
1b734adb | 169 | 'formats': formats, |
9f5809b3 | 170 | } |
171 | ||
22a6f150 | 172 | |
9f5809b3 | 173 | class XTubeUserIE(InfoExtractor): |
174 | IE_DESC = 'XTube user profile' | |
34dd81c0 | 175 | _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)' |
22a6f150 | 176 | _TEST = { |
34dd81c0 | 177 | 'url': 'http://www.xtube.com/profile/greenshowers-4056496', |
22a6f150 | 178 | 'info_dict': { |
34dd81c0 | 179 | 'id': 'greenshowers-4056496', |
05900629 | 180 | 'age_limit': 18, |
22a6f150 | 181 | }, |
838f051c | 182 | 'playlist_mincount': 154, |
22a6f150 | 183 | } |
9f5809b3 | 184 | |
185 | def _real_extract(self, url): | |
34dd81c0 S |
186 | user_id = self._match_id(url) |
187 | ||
188 | entries = [] | |
189 | for pagenum in itertools.count(1): | |
190 | request = sanitized_Request( | |
191 | 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum), | |
192 | headers={ | |
193 | 'Cookie': 'popunder=4', | |
194 | 'X-Requested-With': 'XMLHttpRequest', | |
195 | 'Referer': url, | |
196 | }) | |
197 | ||
198 | page = self._download_json( | |
199 | request, user_id, 'Downloading videos JSON page %d' % pagenum) | |
200 | ||
201 | html = page.get('html') | |
202 | if not html: | |
203 | break | |
204 | ||
f4db0917 S |
205 | for video_id in orderedSet([video_id for _, video_id in re.findall( |
206 | r'data-plid=(["\'])(.+?)\1', html)]): | |
34dd81c0 S |
207 | entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key())) |
208 | ||
209 | page_count = int_or_none(page.get('pageCount')) | |
210 | if not page_count or pagenum == page_count: | |
211 | break | |
212 | ||
213 | playlist = self.playlist_result(entries, user_id) | |
214 | playlist['age_limit'] = 18 | |
215 | return playlist |