]>
Commit | Line | Data |
---|---|---|
d6aa1967 M |
1 | # -*- coding: utf-8 -*- |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | compat_urlparse, | |
9 | str_to_int, | |
10 | ExtractorError, | |
11 | ) | |
12 | import json | |
13 | ||
14 | ||
15 | class GoshgayIE(InfoExtractor): | |
16 | _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)' | |
17 | _TEST = { | |
18 | 'url': 'http://www.goshgay.com/video4116282', | |
19 | 'md5': '268b9f3c3229105c57859e166dd72b03', | |
20 | 'info_dict': { | |
21 | 'id': '4116282', | |
22 | 'ext': 'flv', | |
23 | 'title': 'md5:089833a4790b5e103285a07337f245bf', | |
24 | 'thumbnail': 're:http://.*\.jpg', | |
25 | } | |
26 | } | |
27 | ||
28 | def _real_extract(self, url): | |
29 | mobj = re.match(self._VALID_URL, url) | |
30 | video_id = mobj.group('id') | |
31 | ||
32 | webpage = self._download_webpage(url, video_id) | |
33 | title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title') | |
34 | ||
35 | player_config = self._search_regex(r'jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings', | |
36 | fatal=True, flags=re.S) | |
37 | player_vars = json.loads(player_config.replace("'", '"')) | |
38 | width = str_to_int(player_vars.get('width')) | |
39 | height = str_to_int(player_vars.get('height')) | |
40 | config_uri = player_vars.get('config') | |
41 | ||
42 | if config_uri is None: | |
43 | raise ExtractorError('Missing config URI') | |
44 | node = self._download_xml(config_uri, video_id, 'Downloading player config XML', | |
45 | errnote='Unable to download XML') | |
46 | if node is None: | |
47 | raise ExtractorError('Missing config XML') | |
48 | if node.tag != 'config': | |
49 | raise ExtractorError('Missing config attribute') | |
50 | fns = node.findall('file') | |
51 | imgs = node.findall('image') | |
52 | if len(fns) != 1: | |
53 | raise ExtractorError('Missing media URI') | |
54 | video_url = fns[0].text | |
55 | if len(imgs) < 1: | |
56 | thumbnail = None | |
57 | else: | |
58 | thumbnail = imgs[0].text | |
59 | ||
60 | url_comp = compat_urlparse.urlparse(url) | |
61 | ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) | |
62 | ||
63 | return { | |
64 | 'id': video_id, | |
65 | 'url': video_url, | |
66 | 'title': title, | |
67 | 'width': width, | |
68 | 'height': height, | |
69 | 'thumbnail': thumbnail, | |
70 | 'http_referer': ref, | |
71 | 'age_limit': 18, | |
72 | } |