]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youjizz.py
[discoverygo] Fix JSON data parsing
[yt-dlp.git] / youtube_dl / extractor / youjizz.py
CommitLineData
0b76600d
JMF
1from __future__ import unicode_literals
2
c3c77cec 3from .common import InfoExtractor
c3c77cec
PH
4
5
6class YouJizzIE(InfoExtractor):
84e8cca4
S
7 _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
8 _TESTS = [{
0b76600d 9 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
45aab4d3 10 'md5': '78fc1901148284c69af12640e01c6310',
0b76600d 11 'info_dict': {
28465df1 12 'id': '2189178',
45aab4d3 13 'ext': 'mp4',
611c1dd9
S
14 'title': 'Zeichentrick 1',
15 'age_limit': 18,
6f5ac90c 16 }
84e8cca4
S
17 }, {
18 'url': 'http://www.youjizz.com/videos/-2189178.html',
19 'only_matching': True,
20 }]
c3c77cec
PH
21
22 def _real_extract(self, url):
28465df1 23 video_id = self._match_id(url)
c3c77cec 24 webpage = self._download_webpage(url, video_id)
45aab4d3
YCH
25 # YouJizz's HTML5 player has invalid HTML
26 webpage = webpage.replace('"controls', '" controls')
750e9833 27 age_limit = self._rta_search(webpage)
28465df1
PH
28 video_title = self._html_search_regex(
29 r'<title>\s*(.*)\s*</title>', webpage, 'title')
30
45aab4d3 31 info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
c3c77cec 32
45aab4d3 33 info_dict.update({
0b76600d 34 'id': video_id,
0b76600d 35 'title': video_title,
0b76600d 36 'age_limit': age_limit,
45aab4d3
YCH
37 })
38
39 return info_dict