]>
Commit | Line | Data |
---|---|---|
0b76600d JMF |
1 | from __future__ import unicode_literals |
2 | ||
c3c77cec | 3 | from .common import InfoExtractor |
c3c77cec PH |
4 | |
5 | ||
6 | class YouJizzIE(InfoExtractor): | |
84e8cca4 S |
7 | _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])' |
8 | _TESTS = [{ | |
0b76600d | 9 | 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', |
45aab4d3 | 10 | 'md5': '78fc1901148284c69af12640e01c6310', |
0b76600d | 11 | 'info_dict': { |
28465df1 | 12 | 'id': '2189178', |
45aab4d3 | 13 | 'ext': 'mp4', |
611c1dd9 S |
14 | 'title': 'Zeichentrick 1', |
15 | 'age_limit': 18, | |
6f5ac90c | 16 | } |
84e8cca4 S |
17 | }, { |
18 | 'url': 'http://www.youjizz.com/videos/-2189178.html', | |
19 | 'only_matching': True, | |
20 | }] | |
c3c77cec PH |
21 | |
22 | def _real_extract(self, url): | |
28465df1 | 23 | video_id = self._match_id(url) |
c3c77cec | 24 | webpage = self._download_webpage(url, video_id) |
45aab4d3 YCH |
25 | # YouJizz's HTML5 player has invalid HTML |
26 | webpage = webpage.replace('"controls', '" controls') | |
750e9833 | 27 | age_limit = self._rta_search(webpage) |
28465df1 PH |
28 | video_title = self._html_search_regex( |
29 | r'<title>\s*(.*)\s*</title>', webpage, 'title') | |
30 | ||
45aab4d3 | 31 | info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] |
c3c77cec | 32 | |
45aab4d3 | 33 | info_dict.update({ |
0b76600d | 34 | 'id': video_id, |
0b76600d | 35 | 'title': video_title, |
0b76600d | 36 | 'age_limit': age_limit, |
45aab4d3 YCH |
37 | }) |
38 | ||
39 | return info_dict |