]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/goshgay.py
Merge remote-tracking branch 'MikeCol/goshgay'
[yt-dlp.git] / youtube_dl / extractor / goshgay.py
CommitLineData
d6aa1967
M
1# -*- coding: utf-8 -*-
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
7from ..utils import (
8 compat_urlparse,
9 str_to_int,
10 ExtractorError,
11)
12import json
13
14
15class GoshgayIE(InfoExtractor):
16 _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
17 _TEST = {
18 'url': 'http://www.goshgay.com/video4116282',
19 'md5': '268b9f3c3229105c57859e166dd72b03',
20 'info_dict': {
21 'id': '4116282',
22 'ext': 'flv',
23 'title': 'md5:089833a4790b5e103285a07337f245bf',
24 'thumbnail': 're:http://.*\.jpg',
25 }
26 }
27
28 def _real_extract(self, url):
29 mobj = re.match(self._VALID_URL, url)
30 video_id = mobj.group('id')
31
32 webpage = self._download_webpage(url, video_id)
33 title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
34
35 player_config = self._search_regex(r'jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings',
36 fatal=True, flags=re.S)
37 player_vars = json.loads(player_config.replace("'", '"'))
38 width = str_to_int(player_vars.get('width'))
39 height = str_to_int(player_vars.get('height'))
40 config_uri = player_vars.get('config')
41
42 if config_uri is None:
43 raise ExtractorError('Missing config URI')
44 node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
45 errnote='Unable to download XML')
46 if node is None:
47 raise ExtractorError('Missing config XML')
48 if node.tag != 'config':
49 raise ExtractorError('Missing config attribute')
50 fns = node.findall('file')
51 imgs = node.findall('image')
52 if len(fns) != 1:
53 raise ExtractorError('Missing media URI')
54 video_url = fns[0].text
55 if len(imgs) < 1:
56 thumbnail = None
57 else:
58 thumbnail = imgs[0].text
59
60 url_comp = compat_urlparse.urlparse(url)
61 ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
62
63 return {
64 'id': video_id,
65 'url': video_url,
66 'title': title,
67 'width': width,
68 'height': height,
69 'thumbnail': thumbnail,
70 'http_referer': ref,
71 'age_limit': 18,
72 }