]>
Commit | Line | Data |
---|---|---|
5ce3d5bd YCH |
1 | import re |
2 | ||
a4a554a7 | 3 | from .common import InfoExtractor |
5ce3d5bd | 4 | from ..utils import ( |
5c2d0872 YCH |
5 | float_or_none, |
6 | parse_iso8601, | |
7 | update_url_query, | |
e5dfdc81 RA |
8 | int_or_none, |
9 | determine_protocol, | |
10 | unescapeHTML, | |
5ce3d5bd YCH |
11 | ) |
12 | ||
13 | ||
a4a554a7 | 14 | class SendtoNewsIE(InfoExtractor): |
5c2d0872 | 15 | _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' |
5ce3d5bd YCH |
16 | |
17 | _TEST = { | |
18 | # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ | |
5c2d0872 | 19 | 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', |
5ce3d5bd | 20 | 'info_dict': { |
5c2d0872 | 21 | 'id': 'GxfCe0Zo7D-175909-5588' |
5ce3d5bd | 22 | }, |
e5dfdc81 | 23 | 'playlist_count': 8, |
5c2d0872 YCH |
24 | # test the first video only to prevent lengthy tests |
25 | 'playlist': [{ | |
26 | 'info_dict': { | |
e5dfdc81 | 27 | 'id': '240385', |
5c2d0872 | 28 | 'ext': 'mp4', |
e5dfdc81 RA |
29 | 'title': 'Indians introduce Encarnacion', |
30 | 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland', | |
31 | 'duration': 137.898, | |
ec85ded8 | 32 | 'thumbnail': r're:https?://.*\.jpg$', |
e5dfdc81 RA |
33 | 'upload_date': '20170105', |
34 | 'timestamp': 1483649762, | |
5c2d0872 YCH |
35 | }, |
36 | }], | |
5ce3d5bd YCH |
37 | 'params': { |
38 | # m3u8 download | |
39 | 'skip_download': True, | |
40 | }, | |
41 | } | |
42 | ||
5c2d0872 | 43 | _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' |
5ce3d5bd YCH |
44 | |
45 | @classmethod | |
bfd973ec | 46 | def _extract_embed_urls(cls, url, webpage): |
5ce3d5bd YCH |
47 | mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) |
48 | (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? | |
49 | .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* | |
50 | \1>''', webpage) | |
51 | if mobj: | |
5c2d0872 | 52 | sc = mobj.group('SC') |
bfd973ec | 53 | yield cls._URL_TEMPLATE % sc |
5ce3d5bd YCH |
54 | |
55 | def _real_extract(self, url): | |
5c2d0872 YCH |
56 | playlist_id = self._match_id(url) |
57 | ||
58 | data_url = update_url_query( | |
59 | url.replace('embedplayer.php', 'data_read.php'), | |
60 | {'cmd': 'loadInitial'}) | |
61 | playlist_data = self._download_json(data_url, playlist_id) | |
62 | ||
63 | entries = [] | |
64 | for video in playlist_data['playlistData'][0]: | |
65 | info_dict = self._parse_jwplayer_data( | |
66 | video['jwconfiguration'], | |
e5dfdc81 RA |
67 | require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True}) |
68 | ||
69 | for f in info_dict['formats']: | |
70 | if f.get('tbr'): | |
71 | continue | |
72 | tbr = int_or_none(self._search_regex( | |
73 | r'/(\d+)k/', f['url'], 'bitrate', default=None)) | |
74 | if not tbr: | |
75 | continue | |
76 | f.update({ | |
77 | 'format_id': '%s-%d' % (determine_protocol(f), tbr), | |
78 | 'tbr': tbr, | |
79 | }) | |
e75bb0d6 | 80 | # 'tbr' was explicitly set to be preferred over 'height' originally, |
54f37eea | 81 | # So this is being kept unless someone can confirm this is unnecessary |
82 | self._sort_formats(info_dict['formats'], ('tbr', 'res')) | |
5c2d0872 YCH |
83 | |
84 | thumbnails = [] | |
85 | if video.get('thumbnailUrl'): | |
86 | thumbnails.append({ | |
87 | 'id': 'normal', | |
88 | 'url': video['thumbnailUrl'], | |
89 | }) | |
90 | if video.get('smThumbnailUrl'): | |
91 | thumbnails.append({ | |
92 | 'id': 'small', | |
93 | 'url': video['smThumbnailUrl'], | |
94 | }) | |
95 | info_dict.update({ | |
e5dfdc81 RA |
96 | 'title': video['S_headLine'].strip(), |
97 | 'description': unescapeHTML(video.get('S_fullStory')), | |
5c2d0872 YCH |
98 | 'thumbnails': thumbnails, |
99 | 'duration': float_or_none(video.get('SM_length')), | |
100 | 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), | |
101 | }) | |
102 | entries.append(info_dict) | |
103 | ||
104 | return self.playlist_result(entries, playlist_id) |