]>
Commit | Line | Data |
---|---|---|
f843300f S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
bbb3f730 | 7 | from ..utils import ( |
8 | determine_ext, | |
9 | int_or_none, | |
10 | ) | |
f843300f S |
11 | |
12 | ||
13 | class OnionStudiosIE(InfoExtractor): | |
14 | _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)' | |
15 | ||
16 | _TESTS = [{ | |
17 | 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', | |
18 | 'md5': 'd4851405d31adfadf71cd7a487b765bb', | |
19 | 'info_dict': { | |
20 | 'id': '2937', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'Hannibal charges forward, stops for a cocktail', | |
bbb3f730 | 23 | 'description': 'md5:e786add7f280b7f0fe237b64cc73df76', |
f843300f S |
24 | 'thumbnail': 're:^https?://.*\.jpg$', |
25 | 'uploader': 'The A.V. Club', | |
26 | 'uploader_id': 'TheAVClub', | |
27 | }, | |
28 | }, { | |
29 | 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', | |
30 | 'only_matching': True, | |
31 | }] | |
32 | ||
d4f58034 S |
33 | @staticmethod |
34 | def _extract_url(webpage): | |
35 | mobj = re.search( | |
36 | r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage) | |
37 | if mobj: | |
38 | return mobj.group('url') | |
39 | ||
f843300f S |
40 | def _real_extract(self, url): |
41 | video_id = self._match_id(url) | |
42 | ||
43 | webpage = self._download_webpage( | |
44 | 'http://www.onionstudios.com/embed?id=%s' % video_id, video_id) | |
45 | ||
46 | formats = [] | |
47 | for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage): | |
bbb3f730 | 48 | ext = determine_ext(src) |
49 | if ext == 'm3u8': | |
50 | formats.extend(self._extract_m3u8_formats( | |
51 | src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | |
52 | else: | |
53 | height = int_or_none(self._search_regex( | |
54 | r'/(\d+)\.%s' % ext, src, 'height', default=None)) | |
f843300f | 55 | formats.append({ |
bbb3f730 | 56 | 'format_id': ext + ('-%sp' % height if height else ''), |
f843300f | 57 | 'url': src, |
bbb3f730 | 58 | 'height': height, |
59 | 'ext': ext, | |
60 | 'preference': 1, | |
f843300f S |
61 | }) |
62 | self._sort_formats(formats) | |
63 | ||
64 | title = self._search_regex( | |
53429e65 S |
65 | r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1', |
66 | webpage, 'title', group='title') | |
f843300f | 67 | description = self._search_regex( |
a956cb63 | 68 | r'share_description\s*=\s*(["\'])(?P<description>[^\'"]+?)\1', |
53429e65 | 69 | webpage, 'description', default=None, group='description') |
f843300f | 70 | thumbnail = self._search_regex( |
53429e65 S |
71 | r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1', |
72 | webpage, 'thumbnail', default=False, group='thumbnail') | |
f843300f S |
73 | |
74 | uploader_id = self._search_regex( | |
53429e65 S |
75 | r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1', |
76 | webpage, 'uploader id', fatal=False, group='uploader_id') | |
f843300f | 77 | uploader = self._search_regex( |
53429e65 S |
78 | r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1', |
79 | webpage, 'uploader', default=False, group='uploader') | |
f843300f S |
80 | |
81 | return { | |
82 | 'id': video_id, | |
83 | 'title': title, | |
84 | 'description': description, | |
85 | 'thumbnail': thumbnail, | |
86 | 'uploader': uploader, | |
87 | 'uploader_id': uploader_id, | |
88 | 'formats': formats, | |
89 | } |