]>
Commit | Line | Data |
---|---|---|
5ce3d5bd YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
a4a554a7 | 6 | from .common import InfoExtractor |
5ce3d5bd | 7 | from ..utils import ( |
5c2d0872 YCH |
8 | float_or_none, |
9 | parse_iso8601, | |
10 | update_url_query, | |
e5dfdc81 RA |
11 | int_or_none, |
12 | determine_protocol, | |
13 | unescapeHTML, | |
5ce3d5bd YCH |
14 | ) |
15 | ||
16 | ||
a4a554a7 | 17 | class SendtoNewsIE(InfoExtractor): |
5c2d0872 | 18 | _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' |
5ce3d5bd YCH |
19 | |
20 | _TEST = { | |
21 | # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ | |
5c2d0872 | 22 | 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', |
5ce3d5bd | 23 | 'info_dict': { |
5c2d0872 | 24 | 'id': 'GxfCe0Zo7D-175909-5588' |
5ce3d5bd | 25 | }, |
e5dfdc81 | 26 | 'playlist_count': 8, |
5c2d0872 YCH |
27 | # test the first video only to prevent lengthy tests |
28 | 'playlist': [{ | |
29 | 'info_dict': { | |
e5dfdc81 | 30 | 'id': '240385', |
5c2d0872 | 31 | 'ext': 'mp4', |
e5dfdc81 RA |
32 | 'title': 'Indians introduce Encarnacion', |
33 | 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland', | |
34 | 'duration': 137.898, | |
ec85ded8 | 35 | 'thumbnail': r're:https?://.*\.jpg$', |
e5dfdc81 RA |
36 | 'upload_date': '20170105', |
37 | 'timestamp': 1483649762, | |
5c2d0872 YCH |
38 | }, |
39 | }], | |
5ce3d5bd YCH |
40 | 'params': { |
41 | # m3u8 download | |
42 | 'skip_download': True, | |
43 | }, | |
44 | } | |
45 | ||
5c2d0872 | 46 | _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' |
5ce3d5bd YCH |
47 | |
48 | @classmethod | |
49 | def _extract_url(cls, webpage): | |
50 | mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) | |
51 | (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? | |
52 | .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* | |
53 | \1>''', webpage) | |
54 | if mobj: | |
5c2d0872 YCH |
55 | sc = mobj.group('SC') |
56 | return cls._URL_TEMPLATE % sc | |
5ce3d5bd YCH |
57 | |
58 | def _real_extract(self, url): | |
5c2d0872 YCH |
59 | playlist_id = self._match_id(url) |
60 | ||
61 | data_url = update_url_query( | |
62 | url.replace('embedplayer.php', 'data_read.php'), | |
63 | {'cmd': 'loadInitial'}) | |
64 | playlist_data = self._download_json(data_url, playlist_id) | |
65 | ||
66 | entries = [] | |
67 | for video in playlist_data['playlistData'][0]: | |
68 | info_dict = self._parse_jwplayer_data( | |
69 | video['jwconfiguration'], | |
e5dfdc81 RA |
70 | require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True}) |
71 | ||
72 | for f in info_dict['formats']: | |
73 | if f.get('tbr'): | |
74 | continue | |
75 | tbr = int_or_none(self._search_regex( | |
76 | r'/(\d+)k/', f['url'], 'bitrate', default=None)) | |
77 | if not tbr: | |
78 | continue | |
79 | f.update({ | |
80 | 'format_id': '%s-%d' % (determine_protocol(f), tbr), | |
81 | 'tbr': tbr, | |
82 | }) | |
83 | self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id')) | |
5c2d0872 YCH |
84 | |
85 | thumbnails = [] | |
86 | if video.get('thumbnailUrl'): | |
87 | thumbnails.append({ | |
88 | 'id': 'normal', | |
89 | 'url': video['thumbnailUrl'], | |
90 | }) | |
91 | if video.get('smThumbnailUrl'): | |
92 | thumbnails.append({ | |
93 | 'id': 'small', | |
94 | 'url': video['smThumbnailUrl'], | |
95 | }) | |
96 | info_dict.update({ | |
e5dfdc81 RA |
97 | 'title': video['S_headLine'].strip(), |
98 | 'description': unescapeHTML(video.get('S_fullStory')), | |
5c2d0872 YCH |
99 | 'thumbnails': thumbnails, |
100 | 'duration': float_or_none(video.get('SM_length')), | |
101 | 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), | |
102 | }) | |
103 | entries.append(info_dict) | |
104 | ||
105 | return self.playlist_result(entries, playlist_id) |