]>
Commit | Line | Data |
---|---|---|
43518503 | 1 | from .theplatform import ThePlatformFeedIE |
5c2266df | 2 | from ..utils import ( |
21dedcb5 | 3 | ExtractorError, |
63c55e9f | 4 | int_or_none, |
63c55e9f | 5 | find_xpath_attr, |
45cae3b0 RA |
6 | xpath_element, |
7 | xpath_text, | |
8 | update_url_query, | |
6e6e0d95 | 9 | url_or_none, |
5c2266df | 10 | ) |
fa3ae234 PH |
11 | |
12 | ||
43518503 | 13 | class CBSBaseIE(ThePlatformFeedIE): |
3e0c3d14 | 14 | def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): |
180a9dff RA |
15 | subtitles = {} |
16 | for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]: | |
17 | cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k) | |
18 | if cc_e is not None: | |
19 | cc_url = cc_e.get('value') | |
20 | if cc_url: | |
21 | subtitles.setdefault(subtitles_lang, []).append({ | |
22 | 'ext': ext, | |
23 | 'url': cc_url, | |
24 | }) | |
25 | return subtitles | |
3e0c3d14 | 26 | |
6e6e0d95 | 27 | def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info): |
28 | tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id) | |
29 | tp_release_url = f'https://link.theplatform.com/s/{tp_path}' | |
30 | info = self._extract_theplatform_metadata(tp_path, content_id) | |
31 | ||
32 | formats, subtitles = [], {} | |
33 | last_e = None | |
34 | for asset_type, query in asset_types.items(): | |
35 | try: | |
36 | tp_formats, tp_subtitles = self._extract_theplatform_smil( | |
37 | update_url_query(tp_release_url, query), content_id, | |
38 | 'Downloading %s SMIL data' % asset_type) | |
39 | except ExtractorError as e: | |
40 | last_e = e | |
41 | if asset_type != 'fallback': | |
42 | continue | |
43 | query['formats'] = '' # blank query to check if expired | |
44 | try: | |
45 | tp_formats, tp_subtitles = self._extract_theplatform_smil( | |
46 | update_url_query(tp_release_url, query), content_id, | |
47 | 'Downloading %s SMIL data, trying again with another format' % asset_type) | |
48 | except ExtractorError as e: | |
49 | last_e = e | |
50 | continue | |
51 | formats.extend(tp_formats) | |
52 | subtitles = self._merge_subtitles(subtitles, tp_subtitles) | |
53 | if last_e and not formats: | |
54 | self.raise_no_formats(last_e, True, content_id) | |
55 | self._sort_formats(formats) | |
56 | ||
57 | extra_info.update({ | |
58 | 'id': content_id, | |
59 | 'formats': formats, | |
60 | 'subtitles': subtitles, | |
61 | }) | |
62 | info.update({k: v for k, v in extra_info.items() if v is not None}) | |
63 | return info | |
64 | ||
65 | def _extract_video_info(self, *args, **kwargs): | |
66 | # Extract assets + metadata and call _extract_common_video_info | |
67 | raise NotImplementedError('This method must be implemented by subclasses') | |
68 | ||
69 | def _real_extract(self, url): | |
70 | return self._extract_video_info(self._match_id(url)) | |
71 | ||
3e0c3d14 | 72 | |
73 | class CBSIE(CBSBaseIE): | |
c755f190 | 74 | _VALID_URL = r'''(?x) |
75 | (?: | |
76 | cbs:| | |
77 | https?://(?:www\.)?(?: | |
43c38abd | 78 | cbs\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/| |
c755f190 | 79 | colbertlateshow\.com/(?:video|podcasts)/) |
80 | )(?P<id>[\w-]+)''' | |
fa3ae234 | 81 | |
6e6e0d95 | 82 | # All tests are blocked outside US |
2871d489 | 83 | _TESTS = [{ |
43c38abd | 84 | 'url': 'https://www.cbs.com/shows/video/xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R/', |
e42a692f | 85 | 'info_dict': { |
43c38abd | 86 | 'id': 'xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R', |
63c55e9f | 87 | 'ext': 'mp4', |
43c38abd S |
88 | 'title': 'Tough As Nails - Dreams Never Die', |
89 | 'description': 'md5:a3535a62531cdd52b0364248a2c1ae33', | |
90 | 'duration': 2588, | |
91 | 'timestamp': 1639015200, | |
92 | 'upload_date': '20211209', | |
79ba9140 | 93 | 'uploader': 'CBSI-NEW', |
fa3ae234 | 94 | }, |
dabe1570 RA |
95 | 'params': { |
96 | # m3u8 download | |
97 | 'skip_download': True, | |
98 | }, | |
68f5867c | 99 | }, { |
43c38abd | 100 | 'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/', |
68f5867c | 101 | 'info_dict': { |
43c38abd S |
102 | 'id': 'sZH1MGgomIosZgxGJ1l263MFq16oMtW1', |
103 | 'title': 'The Late Show - 3/16/22 (Michael Buble, Rose Matafeo)', | |
104 | 'timestamp': 1647488100, | |
105 | 'description': 'md5:d0e6ec23c544b7fa8e39a8e6844d2439', | |
68f5867c | 106 | 'uploader': 'CBSI-NEW', |
43c38abd | 107 | 'upload_date': '20220317', |
68f5867c L |
108 | }, |
109 | 'params': { | |
110 | 'ignore_no_formats_error': True, | |
111 | 'skip_download': True, | |
112 | }, | |
113 | 'expected_warnings': [ | |
114 | 'This content expired on', 'No video formats found', 'Requested format is not available'], | |
9bf99891 S |
115 | }, { |
116 | 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', | |
117 | 'only_matching': True, | |
118 | }, { | |
9d581f3d | 119 | 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', |
9bf99891 | 120 | 'only_matching': True, |
2871d489 | 121 | }] |
dabe1570 | 122 | |
96820c1c | 123 | def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517): |
45cae3b0 | 124 | items_data = self._download_xml( |
2c736b4f | 125 | 'https://can.cbs.com/thunder/player/videoPlayerService.php', |
96820c1c | 126 | content_id, query={'partner': site, 'contentId': content_id}) |
45cae3b0 | 127 | video_data = xpath_element(items_data, './/item') |
430c2757 | 128 | title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title') |
45cae3b0 | 129 | |
6e6e0d95 | 130 | asset_types = {} |
8f70b0b8 | 131 | has_drm = False |
45cae3b0 RA |
132 | for item in items_data.findall('.//item'): |
133 | asset_type = xpath_text(item, 'assetType') | |
45cae3b0 RA |
134 | query = { |
135 | 'mbr': 'true', | |
136 | 'assetTypes': asset_type, | |
137 | } | |
68f5867c L |
138 | if not asset_type: |
139 | # fallback for content_ids that videoPlayerService doesn't return anything for | |
68f5867c L |
140 | asset_type = 'fallback' |
141 | query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3' | |
142 | del query['assetTypes'] | |
6e6e0d95 | 143 | if asset_type in asset_types: |
68f5867c L |
144 | continue |
145 | elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')): | |
8f70b0b8 | 146 | if 'DASH_CENC' in asset_type: |
147 | has_drm = True | |
68f5867c | 148 | continue |
68f5867c | 149 | if asset_type.startswith('HLS') or 'StreamPack' in asset_type: |
45cae3b0 RA |
150 | query['formats'] = 'MPEG4,M3U' |
151 | elif asset_type in ('RTMP', 'WIFI', '3G'): | |
152 | query['formats'] = 'MPEG4,FLV' | |
6e6e0d95 | 153 | asset_types[asset_type] = query |
45cae3b0 | 154 | |
8f70b0b8 | 155 | if not asset_types and has_drm: |
156 | self.report_drm(content_id) | |
157 | ||
6e6e0d95 | 158 | return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={ |
159 | 'title': title, | |
160 | 'series': xpath_text(video_data, 'seriesTitle'), | |
161 | 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), | |
162 | 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), | |
163 | 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), | |
164 | 'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')), | |
dabe1570 | 165 | }) |