]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..compat import ( | |
5 | compat_urllib_parse_unquote, | |
6 | compat_urllib_parse_urlparse, | |
7 | ) | |
8 | from ..utils import ( | |
9 | ExtractorError, | |
10 | float_or_none, | |
11 | sanitized_Request, | |
12 | str_or_none, | |
13 | traverse_obj, | |
14 | urlencode_postdata, | |
15 | USER_AGENTS, | |
16 | ) | |
17 | ||
18 | ||
19 | class CeskaTelevizeIE(InfoExtractor): | |
20 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' | |
21 | _TESTS = [{ | |
22 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', | |
23 | 'info_dict': { | |
24 | 'id': '61924494877028507', | |
25 | 'ext': 'mp4', | |
26 | 'title': 'Bonus 01 - En - Hyde Park Civilizace', | |
27 | 'description': 'English Subtittles', | |
28 | 'thumbnail': r're:^https?://.*\.jpg', | |
29 | 'duration': 81.3, | |
30 | }, | |
31 | 'params': { | |
32 | # m3u8 download | |
33 | 'skip_download': True, | |
34 | }, | |
35 | }, { | |
36 | # live stream | |
37 | 'url': 'http://www.ceskatelevize.cz/zive/ct1/', | |
38 | 'info_dict': { | |
39 | 'id': '102', | |
40 | 'ext': 'mp4', | |
41 | 'title': r'ČT1 - živé vysílání online', | |
42 | 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', | |
43 | 'is_live': True, | |
44 | }, | |
45 | 'params': { | |
46 | # m3u8 download | |
47 | 'skip_download': True, | |
48 | }, | |
49 | }, { | |
50 | # another | |
51 | 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | |
52 | 'only_matching': True, | |
53 | 'info_dict': { | |
54 | 'id': 402, | |
55 | 'ext': 'mp4', | |
56 | 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | |
57 | 'is_live': True, | |
58 | }, | |
59 | # 'skip': 'Georestricted to Czech Republic', | |
60 | }, { | |
61 | 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', | |
62 | 'only_matching': True, | |
63 | }, { | |
64 | # video with 18+ caution trailer | |
65 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
66 | 'info_dict': { | |
67 | 'id': '215562210900007-bogotart', | |
68 | 'title': 'Bogotart - Queer', | |
69 | 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', | |
70 | }, | |
71 | 'playlist': [{ | |
72 | 'info_dict': { | |
73 | 'id': '61924494877311053', | |
74 | 'ext': 'mp4', | |
75 | 'title': 'Bogotart - Queer (Varování 18+)', | |
76 | 'duration': 11.9, | |
77 | }, | |
78 | }, { | |
79 | 'info_dict': { | |
80 | 'id': '61924494877068022', | |
81 | 'ext': 'mp4', | |
82 | 'title': 'Bogotart - Queer (Queer)', | |
83 | 'thumbnail': r're:^https?://.*\.jpg', | |
84 | 'duration': 1558.3, | |
85 | }, | |
86 | }], | |
87 | 'params': { | |
88 | # m3u8 download | |
89 | 'skip_download': True, | |
90 | }, | |
91 | }, { | |
92 | # iframe embed | |
93 | 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', | |
94 | 'only_matching': True, | |
95 | }] | |
96 | ||
97 | def _real_extract(self, url): | |
98 | playlist_id = self._match_id(url) | |
99 | webpage, urlh = self._download_webpage_handle(url, playlist_id) | |
100 | parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) | |
101 | site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') | |
102 | playlist_title = self._og_search_title(webpage, default=None) | |
103 | if site_name and playlist_title: | |
104 | playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] | |
105 | playlist_description = self._og_search_description(webpage, default=None) | |
106 | if playlist_description: | |
107 | playlist_description = playlist_description.replace('\xa0', ' ') | |
108 | ||
109 | type_ = 'IDEC' | |
110 | if re.search(r'(^/porady|/zive)/', parsed_url.path): | |
111 | next_data = self._search_nextjs_data(webpage, playlist_id) | |
112 | if '/zive/' in parsed_url.path: | |
113 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) | |
114 | else: | |
115 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) | |
116 | if not idec: | |
117 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) | |
118 | if idec: | |
119 | type_ = 'bonus' | |
120 | if not idec: | |
121 | raise ExtractorError('Failed to find IDEC id') | |
122 | iframe_hash = self._download_webpage( | |
123 | 'https://www.ceskatelevize.cz/v-api/iframe-hash/', | |
124 | playlist_id, note='Getting IFRAME hash') | |
125 | query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } | |
126 | webpage = self._download_webpage( | |
127 | 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', | |
128 | playlist_id, note='Downloading player', query=query) | |
129 | ||
130 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | |
131 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
132 | self.raise_geo_restricted(NOT_AVAILABLE_STRING) | |
133 | if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): | |
134 | raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) | |
135 | ||
136 | type_ = None | |
137 | episode_id = None | |
138 | ||
139 | playlist = self._parse_json( | |
140 | self._search_regex( | |
141 | r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', | |
142 | default='{}'), playlist_id) | |
143 | if playlist: | |
144 | type_ = playlist.get('type') | |
145 | episode_id = playlist.get('id') | |
146 | ||
147 | if not type_: | |
148 | type_ = self._html_search_regex( | |
149 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', | |
150 | webpage, 'type') | |
151 | if not episode_id: | |
152 | episode_id = self._html_search_regex( | |
153 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', | |
154 | webpage, 'episode_id') | |
155 | ||
156 | data = { | |
157 | 'playlist[0][type]': type_, | |
158 | 'playlist[0][id]': episode_id, | |
159 | 'requestUrl': parsed_url.path, | |
160 | 'requestSource': 'iVysilani', | |
161 | } | |
162 | ||
163 | entries = [] | |
164 | ||
165 | for user_agent in (None, USER_AGENTS['Safari']): | |
166 | req = sanitized_Request( | |
167 | 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', | |
168 | data=urlencode_postdata(data)) | |
169 | ||
170 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
171 | req.add_header('x-addr', '127.0.0.1') | |
172 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
173 | if user_agent: | |
174 | req.add_header('User-Agent', user_agent) | |
175 | req.add_header('Referer', url) | |
176 | ||
177 | playlistpage = self._download_json(req, playlist_id, fatal=False) | |
178 | ||
179 | if not playlistpage: | |
180 | continue | |
181 | ||
182 | playlist_url = playlistpage['url'] | |
183 | if playlist_url == 'error_region': | |
184 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
185 | ||
186 | req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) | |
187 | req.add_header('Referer', url) | |
188 | ||
189 | playlist = self._download_json(req, playlist_id, fatal=False) | |
190 | if not playlist: | |
191 | continue | |
192 | ||
193 | playlist = playlist.get('playlist') | |
194 | if not isinstance(playlist, list): | |
195 | continue | |
196 | ||
197 | playlist_len = len(playlist) | |
198 | ||
199 | for num, item in enumerate(playlist): | |
200 | is_live = item.get('type') == 'LIVE' | |
201 | formats = [] | |
202 | for format_id, stream_url in item.get('streamUrls', {}).items(): | |
203 | if 'playerType=flash' in stream_url: | |
204 | stream_formats = self._extract_m3u8_formats( | |
205 | stream_url, playlist_id, 'mp4', 'm3u8_native', | |
206 | m3u8_id='hls-%s' % format_id, fatal=False) | |
207 | else: | |
208 | stream_formats = self._extract_mpd_formats( | |
209 | stream_url, playlist_id, | |
210 | mpd_id='dash-%s' % format_id, fatal=False) | |
211 | if 'drmOnly=true' in stream_url: | |
212 | for f in stream_formats: | |
213 | f['has_drm'] = True | |
214 | # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 | |
215 | if format_id == 'audioDescription': | |
216 | for f in stream_formats: | |
217 | f['source_preference'] = -10 | |
218 | formats.extend(stream_formats) | |
219 | ||
220 | if user_agent and len(entries) == playlist_len: | |
221 | entries[num]['formats'].extend(formats) | |
222 | continue | |
223 | ||
224 | item_id = str_or_none(item.get('id') or item['assetId']) | |
225 | title = item['title'] | |
226 | ||
227 | duration = float_or_none(item.get('duration')) | |
228 | thumbnail = item.get('previewImageUrl') | |
229 | ||
230 | subtitles = {} | |
231 | if item.get('type') == 'VOD': | |
232 | subs = item.get('subtitles') | |
233 | if subs: | |
234 | subtitles = self.extract_subtitles(episode_id, subs) | |
235 | ||
236 | if playlist_len == 1: | |
237 | final_title = playlist_title or title | |
238 | else: | |
239 | final_title = '%s (%s)' % (playlist_title, title) | |
240 | ||
241 | entries.append({ | |
242 | 'id': item_id, | |
243 | 'title': final_title, | |
244 | 'description': playlist_description if playlist_len == 1 else None, | |
245 | 'thumbnail': thumbnail, | |
246 | 'duration': duration, | |
247 | 'formats': formats, | |
248 | 'subtitles': subtitles, | |
249 | 'is_live': is_live, | |
250 | }) | |
251 | ||
252 | if len(entries) == 1: | |
253 | return entries[0] | |
254 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | |
255 | ||
256 | def _get_subtitles(self, episode_id, subs): | |
257 | original_subtitles = self._download_webpage( | |
258 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
259 | srt_subs = self._fix_subtitles(original_subtitles) | |
260 | return { | |
261 | 'cs': [{ | |
262 | 'ext': 'srt', | |
263 | 'data': srt_subs, | |
264 | }] | |
265 | } | |
266 | ||
267 | @staticmethod | |
268 | def _fix_subtitles(subtitles): | |
269 | """ Convert millisecond-based subtitles to SRT """ | |
270 | ||
271 | def _msectotimecode(msec): | |
272 | """ Helper utility to convert milliseconds to timecode """ | |
273 | components = [] | |
274 | for divider in [1000, 60, 60, 100]: | |
275 | components.append(msec % divider) | |
276 | msec //= divider | |
277 | return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) | |
278 | ||
279 | def _fix_subtitle(subtitle): | |
280 | for line in subtitle.splitlines(): | |
281 | m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) | |
282 | if m: | |
283 | yield m.group(1) | |
284 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
285 | yield '{0} --> {1}'.format(start, stop) | |
286 | else: | |
287 | yield line | |
288 | ||
289 | return '\r\n'.join(_fix_subtitle(subtitles)) |