]>
Commit | Line | Data |
---|---|---|
3c9c088f | 1 | from .common import InfoExtractor |
3c9c088f | 2 | from ..compat import compat_urlparse |
f484c5fa S |
3 | from ..utils import ( |
4 | int_or_none, | |
5 | js_to_json, | |
6 | remove_end, | |
7 | unified_strdate, | |
8 | ) | |
3c9c088f T |
9 | |
10 | ||
11 | class VidbitIE(InfoExtractor): | |
f484c5fa S |
12 | _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)' |
13 | _TESTS = [{ | |
14 | 'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2', | |
15 | 'md5': '1a34b7f14defe3b8fafca9796892924d', | |
3c9c088f | 16 | 'info_dict': { |
f484c5fa | 17 | 'id': 'jkL2yDOEq2', |
3c9c088f | 18 | 'ext': 'mp4', |
f484c5fa S |
19 | 'title': 'Intro to VidBit', |
20 | 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7', | |
ec85ded8 | 21 | 'thumbnail': r're:https?://.*\.jpg$', |
f484c5fa S |
22 | 'upload_date': '20160618', |
23 | 'view_count': int, | |
24 | 'comment_count': int, | |
3c9c088f | 25 | } |
f484c5fa S |
26 | }, { |
27 | 'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0', | |
28 | 'only_matching': True, | |
29 | }] | |
3c9c088f T |
30 | |
31 | def _real_extract(self, url): | |
32 | video_id = self._match_id(url) | |
f484c5fa S |
33 | |
34 | webpage = self._download_webpage( | |
35 | compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id) | |
36 | ||
37 | video_url, title = [None] * 2 | |
38 | ||
39 | config = self._parse_json(self._search_regex( | |
40 | r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'), | |
41 | video_id, transform_source=js_to_json) | |
42 | if config: | |
43 | if config.get('file'): | |
44 | video_url = compat_urlparse.urljoin(url, config['file']) | |
45 | title = config.get('title') | |
46 | ||
47 | if not video_url: | |
48 | video_url = compat_urlparse.urljoin(url, self._search_regex( | |
49 | r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', | |
50 | webpage, 'video URL', group='url')) | |
51 | ||
52 | if not title: | |
53 | title = remove_end( | |
54 | self._html_search_regex( | |
55 | (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'), | |
56 | webpage, 'title', default=None) or self._og_search_title(webpage), | |
57 | ' - VidBit') | |
58 | ||
59 | description = self._html_search_meta( | |
60 | ('description', 'og:description', 'twitter:description'), | |
61 | webpage, 'description') | |
62 | ||
63 | upload_date = unified_strdate(self._html_search_meta( | |
64 | 'datePublished', webpage, 'upload date')) | |
65 | ||
66 | view_count = int_or_none(self._search_regex( | |
67 | r'<strong>(\d+)</strong> views', | |
68 | webpage, 'view count', fatal=False)) | |
69 | comment_count = int_or_none(self._search_regex( | |
70 | r'id=["\']cmt_num["\'][^>]*>\((\d+)\)', | |
71 | webpage, 'comment count', fatal=False)) | |
3c9c088f T |
72 | |
73 | return { | |
74 | 'id': video_id, | |
f484c5fa S |
75 | 'url': video_url, |
76 | 'title': title, | |
77 | 'description': description, | |
3c9c088f | 78 | 'thumbnail': self._og_search_thumbnail(webpage), |
f484c5fa S |
79 | 'upload_date': upload_date, |
80 | 'view_count': view_count, | |
81 | 'comment_count': comment_count, | |
3c9c088f | 82 | } |