]>
Commit | Line | Data |
---|---|---|
2aebbcce | 1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | ||
8 | class BeegIE(InfoExtractor): | |
9 | _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' | |
10 | _TEST = { | |
11 | 'url': 'http://beeg.com/5416503', | |
52e1d0cc | 12 | 'md5': '1bff67111adb785c51d1b42959ec10e5', |
2aebbcce | 13 | 'info_dict': { |
14 | 'id': '5416503', | |
15 | 'ext': 'mp4', | |
16 | 'title': 'Sultry Striptease', | |
17 | 'description': 'md5:6db3c6177972822aaba18652ff59c773', | |
d169e36f PH |
18 | 'categories': list, # NSFW |
19 | 'thumbnail': 're:https?://.*\.jpg$', | |
7ca2e11f | 20 | 'age_limit': 18, |
2aebbcce | 21 | } |
22 | } | |
23 | ||
24 | def _real_extract(self, url): | |
25 | mobj = re.match(self._VALID_URL, url) | |
26 | video_id = mobj.group('id') | |
27 | ||
28 | webpage = self._download_webpage(url, video_id) | |
29 | ||
3baa62e8 S |
30 | quality_arr = self._search_regex( |
31 | r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats') | |
32 | ||
33 | formats = [{ | |
34 | 'url': fmt[1], | |
35 | 'format_id': fmt[0], | |
36 | 'height': int(fmt[0][:-1]), | |
37 | } for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)] | |
38 | ||
39 | self._sort_formats(formats) | |
2aebbcce | 40 | |
d169e36f PH |
41 | title = self._html_search_regex( |
42 | r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title') | |
5f6a1245 | 43 | |
2aebbcce | 44 | description = self._html_search_regex( |
d169e36f PH |
45 | r'<meta name="description" content="([^"]*)"', |
46 | webpage, 'description', fatal=False) | |
2aebbcce | 47 | thumbnail = self._html_search_regex( |
d169e36f PH |
48 | r'\'previewer.url\'\s*:\s*"([^"]*)"', |
49 | webpage, 'thumbnail', fatal=False) | |
2aebbcce | 50 | |
51 | categories_str = self._html_search_regex( | |
52 | r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) | |
7ca2e11f PH |
53 | categories = ( |
54 | None if categories_str is None | |
55 | else categories_str.split(',')) | |
2aebbcce | 56 | |
57 | return { | |
58 | 'id': video_id, | |
2aebbcce | 59 | 'title': title, |
60 | 'description': description, | |
61 | 'thumbnail': thumbnail, | |
62 | 'categories': categories, | |
3baa62e8 | 63 | 'formats': formats, |
7ca2e11f | 64 | 'age_limit': 18, |
2aebbcce | 65 | } |