]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .jwplatform import JWPlatformBaseIE | |
7 | from ..compat import compat_parse_qs | |
8 | from ..utils import ( | |
9 | ExtractorError, | |
10 | parse_duration, | |
11 | ) | |
12 | ||
13 | ||
14 | class SendtoNewsIE(JWPlatformBaseIE): | |
15 | _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)' | |
16 | ||
17 | _TEST = { | |
18 | # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ | |
19 | 'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes', | |
20 | 'info_dict': { | |
21 | 'id': 'GxfCe0Zo7D-175909-5588', | |
22 | 'ext': 'mp4', | |
23 | 'title': 'Recap: CLE 15, CIN 6', | |
24 | 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', | |
25 | 'duration': 49, | |
26 | }, | |
27 | 'params': { | |
28 | # m3u8 download | |
29 | 'skip_download': True, | |
30 | }, | |
31 | } | |
32 | ||
33 | _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s' | |
34 | ||
35 | @classmethod | |
36 | def _extract_url(cls, webpage): | |
37 | mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) | |
38 | (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? | |
39 | .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* | |
40 | \1>''', webpage) | |
41 | if mobj: | |
42 | sk, mk, pk = mobj.group('SC').split('-') | |
43 | return cls._URL_TEMPLATE % (sk, mk, pk) | |
44 | ||
45 | def _real_extract(self, url): | |
46 | mobj = re.match(self._VALID_URL, url) | |
47 | params = compat_parse_qs(mobj.group('query')) | |
48 | ||
49 | if 'SK' not in params or 'MK' not in params or 'PK' not in params: | |
50 | raise ExtractorError('Invalid URL', expected=True) | |
51 | ||
52 | video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]]) | |
53 | ||
54 | webpage = self._download_webpage(url, video_id) | |
55 | ||
56 | jwplayer_data_str = self._search_regex( | |
57 | r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data') | |
58 | js_vars = { | |
59 | 'w': 1024, | |
60 | 'h': 768, | |
61 | 'modeVar': 'html5', | |
62 | } | |
63 | for name, val in js_vars.items(): | |
64 | js_val = '%d' % val if isinstance(val, int) else '"%s"' % val | |
65 | jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val) | |
66 | ||
67 | info_dict = self._parse_jwplayer_data( | |
68 | self._parse_json(jwplayer_data_str, video_id), | |
69 | video_id, require_title=False, rtmp_params={'no_resume': True}) | |
70 | ||
71 | title = self._html_search_regex( | |
72 | r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title') | |
73 | description = self._html_search_regex( | |
74 | r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage, | |
75 | 'description', fatal=False) | |
76 | duration = parse_duration(self._html_search_regex( | |
77 | r'<div[^>]+class="embedDetails">([0-9:]+)', webpage, | |
78 | 'duration', fatal=False)) | |
79 | ||
80 | info_dict.update({ | |
81 | 'title': title, | |
82 | 'description': description, | |
83 | 'duration': duration, | |
84 | }) | |
85 | ||
86 | return info_dict |