]>
Commit | Line | Data |
---|---|---|
e42a692f PH |
1 | from __future__ import unicode_literals |
2 | ||
43518503 | 3 | from .theplatform import ThePlatformFeedIE |
5c2266df | 4 | from ..utils import ( |
21dedcb5 | 5 | ExtractorError, |
63c55e9f | 6 | int_or_none, |
63c55e9f | 7 | find_xpath_attr, |
45cae3b0 RA |
8 | xpath_element, |
9 | xpath_text, | |
10 | update_url_query, | |
5c2266df | 11 | ) |
fa3ae234 PH |
12 | |
13 | ||
43518503 | 14 | class CBSBaseIE(ThePlatformFeedIE): |
3e0c3d14 | 15 | def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): |
180a9dff RA |
16 | subtitles = {} |
17 | for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]: | |
18 | cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k) | |
19 | if cc_e is not None: | |
20 | cc_url = cc_e.get('value') | |
21 | if cc_url: | |
22 | subtitles.setdefault(subtitles_lang, []).append({ | |
23 | 'ext': ext, | |
24 | 'url': cc_url, | |
25 | }) | |
26 | return subtitles | |
3e0c3d14 | 27 | |
28 | ||
29 | class CBSIE(CBSBaseIE): | |
81970792 | 30 | _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' |
fa3ae234 | 31 | |
2871d489 | 32 | _TESTS = [{ |
e42a692f PH |
33 | 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', |
34 | 'info_dict': { | |
63c55e9f | 35 | 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', |
63c55e9f | 36 | 'ext': 'mp4', |
e42a692f PH |
37 | 'title': 'Connect Chat feat. Garth Brooks', |
38 | 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', | |
39 | 'duration': 1495, | |
79ba9140 | 40 | 'timestamp': 1385585425, |
41 | 'upload_date': '20131127', | |
42 | 'uploader': 'CBSI-NEW', | |
fa3ae234 | 43 | }, |
dabe1570 RA |
44 | 'params': { |
45 | # m3u8 download | |
46 | 'skip_download': True, | |
47 | }, | |
e42a692f | 48 | '_skip': 'Blocked outside the US', |
9bf99891 S |
49 | }, { |
50 | 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', | |
51 | 'only_matching': True, | |
52 | }, { | |
9d581f3d | 53 | 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', |
9bf99891 | 54 | 'only_matching': True, |
2871d489 | 55 | }] |
dabe1570 | 56 | |
96820c1c | 57 | def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517): |
45cae3b0 RA |
58 | items_data = self._download_xml( |
59 | 'http://can.cbs.com/thunder/player/videoPlayerService.php', | |
96820c1c | 60 | content_id, query={'partner': site, 'contentId': content_id}) |
45cae3b0 RA |
61 | video_data = xpath_element(items_data, './/item') |
62 | title = xpath_text(video_data, 'videoTitle', 'title', True) | |
96820c1c | 63 | tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id) |
45cae3b0 RA |
64 | tp_release_url = 'http://link.theplatform.com/s/' + tp_path |
65 | ||
66 | asset_types = [] | |
67 | subtitles = {} | |
68 | formats = [] | |
21dedcb5 | 69 | last_e = None |
45cae3b0 RA |
70 | for item in items_data.findall('.//item'): |
71 | asset_type = xpath_text(item, 'assetType') | |
35c2dd48 | 72 | if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type: |
dabe1570 | 73 | continue |
45cae3b0 RA |
74 | asset_types.append(asset_type) |
75 | query = { | |
76 | 'mbr': 'true', | |
77 | 'assetTypes': asset_type, | |
78 | } | |
79 | if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): | |
80 | query['formats'] = 'MPEG4,M3U' | |
81 | elif asset_type in ('RTMP', 'WIFI', '3G'): | |
82 | query['formats'] = 'MPEG4,FLV' | |
21dedcb5 S |
83 | try: |
84 | tp_formats, tp_subtitles = self._extract_theplatform_smil( | |
85 | update_url_query(tp_release_url, query), content_id, | |
86 | 'Downloading %s SMIL data' % asset_type) | |
87 | except ExtractorError as e: | |
88 | last_e = e | |
89 | continue | |
45cae3b0 RA |
90 | formats.extend(tp_formats) |
91 | subtitles = self._merge_subtitles(subtitles, tp_subtitles) | |
21dedcb5 S |
92 | if last_e and not formats: |
93 | raise last_e | |
dabe1570 | 94 | self._sort_formats(formats) |
45cae3b0 RA |
95 | |
96 | info = self._extract_theplatform_metadata(tp_path, content_id) | |
dabe1570 | 97 | info.update({ |
45cae3b0 RA |
98 | 'id': content_id, |
99 | 'title': title, | |
100 | 'series': xpath_text(video_data, 'seriesTitle'), | |
101 | 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), | |
102 | 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), | |
103 | 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), | |
104 | 'thumbnail': xpath_text(video_data, 'previewImageURL'), | |
dabe1570 RA |
105 | 'formats': formats, |
106 | 'subtitles': subtitles, | |
dabe1570 RA |
107 | }) |
108 | return info | |
63c55e9f | 109 | |
fa3ae234 | 110 | def _real_extract(self, url): |
43518503 | 111 | content_id = self._match_id(url) |
dabe1570 | 112 | return self._extract_video_info(content_id) |