]>
Commit | Line | Data |
---|---|---|
2aebbcce | 1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | ||
8 | class BeegIE(InfoExtractor): | |
9 | _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' | |
10 | _TEST = { | |
11 | 'url': 'http://beeg.com/5416503', | |
12 | 'md5': '634526ae978711f6b748fe0dd6c11f57', | |
13 | 'info_dict': { | |
14 | 'id': '5416503', | |
15 | 'ext': 'mp4', | |
16 | 'title': 'Sultry Striptease', | |
17 | 'description': 'md5:6db3c6177972822aaba18652ff59c773', | |
18 | } | |
19 | } | |
20 | ||
21 | def _real_extract(self, url): | |
22 | mobj = re.match(self._VALID_URL, url) | |
23 | video_id = mobj.group('id') | |
24 | ||
25 | webpage = self._download_webpage(url, video_id) | |
26 | ||
27 | video_url = self._html_search_regex(r"'480p'\s*:\s*'([^']+)'", webpage, 'video URL') | |
28 | ||
29 | title = self._html_search_regex(r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title') | |
30 | ||
31 | description = self._html_search_regex( | |
32 | r'<meta name="description" content="([^"]*)"', webpage, 'description', fatal=False) | |
33 | ||
34 | thumbnail = self._html_search_regex( | |
35 | r'\'previewer.url\'\s*:\s*"([^"]*)"', webpage, 'thumbnail', fatal=False) | |
36 | ||
37 | categories_str = self._html_search_regex( | |
38 | r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) | |
39 | categories = categories_str.split(',') | |
40 | ||
41 | return { | |
42 | 'id': video_id, | |
43 | 'url': video_url, | |
44 | 'title': title, | |
45 | 'description': description, | |
46 | 'thumbnail': thumbnail, | |
47 | 'categories': categories, | |
48 | } |