]>
Commit | Line | Data |
---|---|---|
c5ba203e AS |
1 | from __future__ import unicode_literals |
2 | ||
34dd81c0 | 3 | import itertools |
dcc2a706 | 4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
5c2266df | 7 | from ..compat import compat_urllib_parse_unquote |
1cc79574 | 8 | from ..utils import ( |
34dd81c0 | 9 | int_or_none, |
f4db0917 | 10 | orderedSet, |
5c2266df | 11 | sanitized_Request, |
607dbbad | 12 | str_to_int, |
dcc2a706 | 13 | ) |
14 | ||
607dbbad | 15 | |
dcc2a706 | 16 | class XTubeIE(InfoExtractor): |
86be3cdc S |
17 | _VALID_URL = r'(?:xtube:|https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-))(?P<id>[^/?&#]+)' |
18 | ||
19 | _TESTS = [{ | |
20 | # old URL schema | |
c5ba203e | 21 | 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', |
c5ba203e AS |
22 | 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', |
23 | 'info_dict': { | |
607dbbad S |
24 | 'id': 'kVTUy_G222_', |
25 | 'ext': 'mp4', | |
26 | 'title': 'strange erotica', | |
9789d753 | 27 | 'description': 'contains:an ET kind of thing', |
607dbbad S |
28 | 'uploader': 'greenshowers', |
29 | 'duration': 450, | |
30 | 'age_limit': 18, | |
dcc2a706 | 31 | } |
86be3cdc S |
32 | }, { |
33 | # new URL schema | |
34 | 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', | |
35 | 'only_matching': True, | |
36 | }, { | |
37 | 'url': 'xtube:625837', | |
38 | 'only_matching': True, | |
39 | }] | |
dcc2a706 | 40 | |
41 | def _real_extract(self, url): | |
86be3cdc S |
42 | mobj = re.match(self._VALID_URL, url) |
43 | video_id = mobj.group('id') | |
44 | display_id = mobj.group('display_id') | |
45 | ||
46 | if not display_id: | |
47 | display_id = video_id | |
48 | url = 'http://www.xtube.com/watch.php?v=%s' % video_id | |
49 | ||
50 | req = sanitized_Request(url) | |
51 | req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') | |
52 | webpage = self._download_webpage(req, display_id) | |
dcc2a706 | 53 | |
86be3cdc S |
54 | flashvars = self._parse_json( |
55 | self._search_regex( | |
56 | r'xt\.playerOps\s*=\s*({.+?});', webpage, 'player ops'), | |
57 | video_id)['flashvars'] | |
dcc2a706 | 58 | |
86be3cdc S |
59 | title = flashvars.get('title') or self._search_regex( |
60 | r'<h1>([^<]+)</h1>', webpage, 'title') | |
61 | video_url = compat_urllib_parse_unquote(flashvars['video_url']) | |
62 | duration = int_or_none(flashvars.get('video_duration')) | |
63 | ||
64 | uploader = self._search_regex( | |
65 | r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', | |
16ea8179 | 66 | webpage, 'uploader', fatal=False) |
86be3cdc S |
67 | description = self._search_regex( |
68 | r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) | |
69 | view_count = str_to_int(self._search_regex( | |
70 | r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>', | |
16ea8179 S |
71 | webpage, 'view count', fatal=False)) |
72 | comment_count = str_to_int(self._html_search_regex( | |
86be3cdc | 73 | r'>Comments? \(([\d,\.]+)\)<', |
16ea8179 | 74 | webpage, 'comment count', fatal=False)) |
aa488e13 | 75 | |
dcc2a706 | 76 | return { |
77 | 'id': video_id, | |
86be3cdc S |
78 | 'display_id': display_id, |
79 | 'url': video_url, | |
80 | 'title': title, | |
81 | 'description': description, | |
82 | 'uploader': uploader, | |
607dbbad S |
83 | 'duration': duration, |
84 | 'view_count': view_count, | |
85 | 'comment_count': comment_count, | |
dcc2a706 | 86 | 'age_limit': 18, |
9f5809b3 | 87 | } |
88 | ||
22a6f150 | 89 | |
9f5809b3 | 90 | class XTubeUserIE(InfoExtractor): |
91 | IE_DESC = 'XTube user profile' | |
34dd81c0 | 92 | _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)' |
22a6f150 | 93 | _TEST = { |
34dd81c0 | 94 | 'url': 'http://www.xtube.com/profile/greenshowers-4056496', |
22a6f150 | 95 | 'info_dict': { |
34dd81c0 | 96 | 'id': 'greenshowers-4056496', |
05900629 | 97 | 'age_limit': 18, |
22a6f150 PH |
98 | }, |
99 | 'playlist_mincount': 155, | |
100 | } | |
9f5809b3 | 101 | |
102 | def _real_extract(self, url): | |
34dd81c0 S |
103 | user_id = self._match_id(url) |
104 | ||
105 | entries = [] | |
106 | for pagenum in itertools.count(1): | |
107 | request = sanitized_Request( | |
108 | 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum), | |
109 | headers={ | |
110 | 'Cookie': 'popunder=4', | |
111 | 'X-Requested-With': 'XMLHttpRequest', | |
112 | 'Referer': url, | |
113 | }) | |
114 | ||
115 | page = self._download_json( | |
116 | request, user_id, 'Downloading videos JSON page %d' % pagenum) | |
117 | ||
118 | html = page.get('html') | |
119 | if not html: | |
120 | break | |
121 | ||
f4db0917 S |
122 | for video_id in orderedSet([video_id for _, video_id in re.findall( |
123 | r'data-plid=(["\'])(.+?)\1', html)]): | |
34dd81c0 S |
124 | entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key())) |
125 | ||
126 | page_count = int_or_none(page.get('pageCount')) | |
127 | if not page_count or pagenum == page_count: | |
128 | break | |
129 | ||
130 | playlist = self.playlist_result(entries, user_id) | |
131 | playlist['age_limit'] = 18 | |
132 | return playlist |