]>
Commit | Line | Data |
---|---|---|
5ce3d5bd YCH |
1 | import re |
2 | ||
a4a554a7 | 3 | from .common import InfoExtractor |
5ce3d5bd | 4 | from ..utils import ( |
5c2d0872 YCH |
5 | float_or_none, |
6 | parse_iso8601, | |
7 | update_url_query, | |
e5dfdc81 RA |
8 | int_or_none, |
9 | determine_protocol, | |
10 | unescapeHTML, | |
5ce3d5bd YCH |
11 | ) |
12 | ||
13 | ||
a4a554a7 | 14 | class SendtoNewsIE(InfoExtractor): |
df773c3d | 15 | _WORKING = False |
5c2d0872 | 16 | _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' |
5ce3d5bd YCH |
17 | |
18 | _TEST = { | |
19 | # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ | |
5c2d0872 | 20 | 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', |
5ce3d5bd | 21 | 'info_dict': { |
5c2d0872 | 22 | 'id': 'GxfCe0Zo7D-175909-5588' |
5ce3d5bd | 23 | }, |
e5dfdc81 | 24 | 'playlist_count': 8, |
5c2d0872 YCH |
25 | # test the first video only to prevent lengthy tests |
26 | 'playlist': [{ | |
27 | 'info_dict': { | |
e5dfdc81 | 28 | 'id': '240385', |
5c2d0872 | 29 | 'ext': 'mp4', |
e5dfdc81 RA |
30 | 'title': 'Indians introduce Encarnacion', |
31 | 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland', | |
32 | 'duration': 137.898, | |
ec85ded8 | 33 | 'thumbnail': r're:https?://.*\.jpg$', |
e5dfdc81 RA |
34 | 'upload_date': '20170105', |
35 | 'timestamp': 1483649762, | |
5c2d0872 YCH |
36 | }, |
37 | }], | |
5ce3d5bd YCH |
38 | 'params': { |
39 | # m3u8 download | |
40 | 'skip_download': True, | |
41 | }, | |
42 | } | |
43 | ||
5c2d0872 | 44 | _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' |
5ce3d5bd YCH |
45 | |
46 | @classmethod | |
bfd973ec | 47 | def _extract_embed_urls(cls, url, webpage): |
5ce3d5bd YCH |
48 | mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) |
49 | (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? | |
50 | .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* | |
51 | \1>''', webpage) | |
52 | if mobj: | |
5c2d0872 | 53 | sc = mobj.group('SC') |
bfd973ec | 54 | yield cls._URL_TEMPLATE % sc |
5ce3d5bd YCH |
55 | |
56 | def _real_extract(self, url): | |
5c2d0872 YCH |
57 | playlist_id = self._match_id(url) |
58 | ||
59 | data_url = update_url_query( | |
60 | url.replace('embedplayer.php', 'data_read.php'), | |
61 | {'cmd': 'loadInitial'}) | |
62 | playlist_data = self._download_json(data_url, playlist_id) | |
63 | ||
64 | entries = [] | |
65 | for video in playlist_data['playlistData'][0]: | |
66 | info_dict = self._parse_jwplayer_data( | |
67 | video['jwconfiguration'], | |
e5dfdc81 RA |
68 | require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True}) |
69 | ||
70 | for f in info_dict['formats']: | |
71 | if f.get('tbr'): | |
72 | continue | |
73 | tbr = int_or_none(self._search_regex( | |
74 | r'/(\d+)k/', f['url'], 'bitrate', default=None)) | |
75 | if not tbr: | |
76 | continue | |
77 | f.update({ | |
78 | 'format_id': '%s-%d' % (determine_protocol(f), tbr), | |
79 | 'tbr': tbr, | |
80 | }) | |
5c2d0872 YCH |
81 | |
82 | thumbnails = [] | |
83 | if video.get('thumbnailUrl'): | |
84 | thumbnails.append({ | |
85 | 'id': 'normal', | |
86 | 'url': video['thumbnailUrl'], | |
87 | }) | |
88 | if video.get('smThumbnailUrl'): | |
89 | thumbnails.append({ | |
90 | 'id': 'small', | |
91 | 'url': video['smThumbnailUrl'], | |
92 | }) | |
93 | info_dict.update({ | |
e5dfdc81 RA |
94 | 'title': video['S_headLine'].strip(), |
95 | 'description': unescapeHTML(video.get('S_fullStory')), | |
5c2d0872 YCH |
96 | 'thumbnails': thumbnails, |
97 | 'duration': float_or_none(video.get('SM_length')), | |
98 | 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), | |
9f14daf2 | 99 | # 'tbr' was explicitly set to be preferred over 'height' originally, |
100 | # So this is being kept unless someone can confirm this is unnecessary | |
101 | '_format_sort_fields': ('tbr', 'res') | |
5c2d0872 YCH |
102 | }) |
103 | entries.append(info_dict) | |
104 | ||
105 | return self.playlist_result(entries, playlist_id) |