]>
Commit | Line | Data |
---|---|---|
8cfb5bbf | 1 | import re |
8cfb5bbf | 2 | |
48246541 | 3 | from .common import InfoExtractor |
1cc79574 | 4 | from ..compat import ( |
3e72f5f1 | 5 | compat_urllib_parse_unquote, |
39201787 | 6 | compat_urllib_parse_urlparse, |
1cc79574 PH |
7 | ) |
8 | from ..utils import ( | |
39201787 | 9 | ExtractorError, |
02ec32a1 | 10 | float_or_none, |
5c2266df | 11 | sanitized_Request, |
db4678e4 | 12 | str_or_none, |
443b21dc | 13 | traverse_obj, |
6e6bc8da | 14 | urlencode_postdata, |
5cb2d36c | 15 | USER_AGENTS, |
8cfb5bbf | 16 | ) |
17 | ||
18 | ||
48246541 | 19 | class CeskaTelevizeIE(InfoExtractor): |
db4678e4 | 20 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' |
97243fe3 | 21 | _TESTS = [{ |
3951e7eb S |
22 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', |
23 | 'info_dict': { | |
24 | 'id': '61924494877028507', | |
25 | 'ext': 'mp4', | |
db4678e4 | 26 | 'title': 'Bonus 01 - En - Hyde Park Civilizace', |
3951e7eb | 27 | 'description': 'English Subtittles', |
ec85ded8 | 28 | 'thumbnail': r're:^https?://.*\.jpg', |
3951e7eb S |
29 | 'duration': 81.3, |
30 | }, | |
31 | 'params': { | |
32 | # m3u8 download | |
33 | 'skip_download': True, | |
34 | }, | |
97243fe3 | 35 | }, { |
f1f6f5aa | 36 | # live stream |
db4678e4 | 37 | 'url': 'http://www.ceskatelevize.cz/zive/ct1/', |
97243fe3 | 38 | 'info_dict': { |
db4678e4 | 39 | 'id': '102', |
97243fe3 | 40 | 'ext': 'mp4', |
db4678e4 | 41 | 'title': r'ČT1 - živé vysílání online', |
42 | 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', | |
f1f6f5aa | 43 | 'is_live': True, |
97243fe3 S |
44 | }, |
45 | 'params': { | |
46 | # m3u8 download | |
47 | 'skip_download': True, | |
48 | }, | |
db4678e4 | 49 | }, { |
50 | # another | |
51 | 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | |
52 | 'only_matching': True, | |
53 | 'info_dict': { | |
54 | 'id': 402, | |
55 | 'ext': 'mp4', | |
56 | 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | |
57 | 'is_live': True, | |
58 | }, | |
59 | # 'skip': 'Georestricted to Czech Republic', | |
97243fe3 | 60 | }, { |
e18f1da9 S |
61 | 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', |
62 | 'only_matching': True, | |
92592bd3 M |
63 | }, { |
64 | # video with 18+ caution trailer | |
65 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
66 | 'info_dict': { | |
67 | 'id': '215562210900007-bogotart', | |
db4678e4 | 68 | 'title': 'Bogotart - Queer', |
69 | 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', | |
92592bd3 M |
70 | }, |
71 | 'playlist': [{ | |
72 | 'info_dict': { | |
73 | 'id': '61924494877311053', | |
74 | 'ext': 'mp4', | |
db4678e4 | 75 | 'title': 'Bogotart - Queer (Varování 18+)', |
92592bd3 M |
76 | 'duration': 11.9, |
77 | }, | |
78 | }, { | |
79 | 'info_dict': { | |
80 | 'id': '61924494877068022', | |
81 | 'ext': 'mp4', | |
db4678e4 | 82 | 'title': 'Bogotart - Queer (Queer)', |
92592bd3 M |
83 | 'thumbnail': r're:^https?://.*\.jpg', |
84 | 'duration': 1558.3, | |
85 | }, | |
86 | }], | |
87 | 'params': { | |
88 | # m3u8 download | |
89 | 'skip_download': True, | |
90 | }, | |
91 | }, { | |
92 | # iframe embed | |
93 | 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', | |
94 | 'only_matching': True, | |
97243fe3 | 95 | }] |
8cfb5bbf | 96 | |
97 | def _real_extract(self, url): | |
e18f1da9 | 98 | playlist_id = self._match_id(url) |
db4678e4 | 99 | webpage, urlh = self._download_webpage_handle(url, playlist_id) |
100 | parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) | |
101 | site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') | |
92592bd3 M |
102 | playlist_title = self._og_search_title(webpage, default=None) |
103 | if site_name and playlist_title: | |
db4678e4 | 104 | playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] |
92592bd3 M |
105 | playlist_description = self._og_search_description(webpage, default=None) |
106 | if playlist_description: | |
107 | playlist_description = playlist_description.replace('\xa0', ' ') | |
108 | ||
db4678e4 | 109 | type_ = 'IDEC' |
110 | if re.search(r'(^/porady|/zive)/', parsed_url.path): | |
443b21dc | 111 | next_data = self._search_nextjs_data(webpage, playlist_id) |
db4678e4 | 112 | if '/zive/' in parsed_url.path: |
113 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) | |
114 | else: | |
115 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) | |
116 | if not idec: | |
117 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) | |
118 | if idec: | |
119 | type_ = 'bonus' | |
443b21dc MK |
120 | if not idec: |
121 | raise ExtractorError('Failed to find IDEC id') | |
db4678e4 | 122 | iframe_hash = self._download_webpage( |
123 | 'https://www.ceskatelevize.cz/v-api/iframe-hash/', | |
124 | playlist_id, note='Getting IFRAME hash') | |
125 | query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } | |
126 | webpage = self._download_webpage( | |
127 | 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', | |
128 | playlist_id, note='Downloading player', query=query) | |
8cfb5bbf | 129 | |
7d78f0cc S |
130 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
131 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
db4678e4 | 132 | self.raise_geo_restricted(NOT_AVAILABLE_STRING) |
133 | if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): | |
134 | raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) | |
39201787 | 135 | |
e18f1da9 S |
136 | type_ = None |
137 | episode_id = None | |
138 | ||
139 | playlist = self._parse_json( | |
140 | self._search_regex( | |
141 | r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', | |
142 | default='{}'), playlist_id) | |
143 | if playlist: | |
144 | type_ = playlist.get('type') | |
145 | episode_id = playlist.get('id') | |
146 | ||
147 | if not type_: | |
148 | type_ = self._html_search_regex( | |
149 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', | |
150 | webpage, 'type') | |
151 | if not episode_id: | |
152 | episode_id = self._html_search_regex( | |
153 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', | |
154 | webpage, 'episode_id') | |
8cfb5bbf | 155 | |
156 | data = { | |
e18f1da9 | 157 | 'playlist[0][type]': type_, |
8cfb5bbf | 158 | 'playlist[0][id]': episode_id, |
92592bd3 | 159 | 'requestUrl': parsed_url.path, |
8cfb5bbf | 160 | 'requestSource': 'iVysilani', |
161 | } | |
162 | ||
97243fe3 | 163 | entries = [] |
5cb2d36c S |
164 | |
165 | for user_agent in (None, USER_AGENTS['Safari']): | |
166 | req = sanitized_Request( | |
92592bd3 | 167 | 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', |
5cb2d36c S |
168 | data=urlencode_postdata(data)) |
169 | ||
170 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
171 | req.add_header('x-addr', '127.0.0.1') | |
172 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
173 | if user_agent: | |
174 | req.add_header('User-Agent', user_agent) | |
175 | req.add_header('Referer', url) | |
176 | ||
177 | playlistpage = self._download_json(req, playlist_id, fatal=False) | |
178 | ||
179 | if not playlistpage: | |
180 | continue | |
181 | ||
182 | playlist_url = playlistpage['url'] | |
183 | if playlist_url == 'error_region': | |
184 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
185 | ||
186 | req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) | |
187 | req.add_header('Referer', url) | |
188 | ||
5cb2d36c S |
189 | playlist = self._download_json(req, playlist_id, fatal=False) |
190 | if not playlist: | |
191 | continue | |
192 | ||
193 | playlist = playlist.get('playlist') | |
194 | if not isinstance(playlist, list): | |
195 | continue | |
196 | ||
197 | playlist_len = len(playlist) | |
198 | ||
199 | for num, item in enumerate(playlist): | |
200 | is_live = item.get('type') == 'LIVE' | |
201 | formats = [] | |
202 | for format_id, stream_url in item.get('streamUrls', {}).items(): | |
203 | if 'playerType=flash' in stream_url: | |
eafaeb22 | 204 | stream_formats = self._extract_m3u8_formats( |
fb4fc449 | 205 | stream_url, playlist_id, 'mp4', 'm3u8_native', |
eafaeb22 | 206 | m3u8_id='hls-%s' % format_id, fatal=False) |
5cb2d36c | 207 | else: |
eafaeb22 S |
208 | stream_formats = self._extract_mpd_formats( |
209 | stream_url, playlist_id, | |
210 | mpd_id='dash-%s' % format_id, fatal=False) | |
88acdbc2 | 211 | if 'drmOnly=true' in stream_url: |
212 | for f in stream_formats: | |
213 | f['has_drm'] = True | |
067aa17e | 214 | # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 |
eafaeb22 S |
215 | if format_id == 'audioDescription': |
216 | for f in stream_formats: | |
217 | f['source_preference'] = -10 | |
218 | formats.extend(stream_formats) | |
5cb2d36c S |
219 | |
220 | if user_agent and len(entries) == playlist_len: | |
221 | entries[num]['formats'].extend(formats) | |
222 | continue | |
223 | ||
db4678e4 | 224 | item_id = str_or_none(item.get('id') or item['assetId']) |
5cb2d36c S |
225 | title = item['title'] |
226 | ||
227 | duration = float_or_none(item.get('duration')) | |
228 | thumbnail = item.get('previewImageUrl') | |
229 | ||
230 | subtitles = {} | |
231 | if item.get('type') == 'VOD': | |
232 | subs = item.get('subtitles') | |
233 | if subs: | |
234 | subtitles = self.extract_subtitles(episode_id, subs) | |
235 | ||
236 | if playlist_len == 1: | |
237 | final_title = playlist_title or title | |
5cb2d36c S |
238 | else: |
239 | final_title = '%s (%s)' % (playlist_title, title) | |
240 | ||
241 | entries.append({ | |
242 | 'id': item_id, | |
243 | 'title': final_title, | |
244 | 'description': playlist_description if playlist_len == 1 else None, | |
245 | 'thumbnail': thumbnail, | |
246 | 'duration': duration, | |
247 | 'formats': formats, | |
248 | 'subtitles': subtitles, | |
249 | 'is_live': is_live, | |
250 | }) | |
251 | ||
db4678e4 | 252 | if len(entries) == 1: |
253 | return entries[0] | |
97243fe3 | 254 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) |
c067545c | 255 | |
48246541 JMF |
256 | def _get_subtitles(self, episode_id, subs): |
257 | original_subtitles = self._download_webpage( | |
258 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
259 | srt_subs = self._fix_subtitles(original_subtitles) | |
260 | return { | |
261 | 'cs': [{ | |
262 | 'ext': 'srt', | |
263 | 'data': srt_subs, | |
264 | }] | |
265 | } | |
266 | ||
27a82a1b S |
267 | @staticmethod |
268 | def _fix_subtitles(subtitles): | |
269 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
270 | |
271 | def _msectotimecode(msec): | |
27a82a1b | 272 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
273 | components = [] |
274 | for divider in [1000, 60, 60, 100]: | |
275 | components.append(msec % divider) | |
276 | msec //= divider | |
611c1dd9 | 277 | return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) |
c067545c OC |
278 | |
279 | def _fix_subtitle(subtitle): | |
280 | for line in subtitle.splitlines(): | |
611c1dd9 | 281 | m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) |
c067545c OC |
282 | if m: |
283 | yield m.group(1) | |
284 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
611c1dd9 | 285 | yield '{0} --> {1}'.format(start, stop) |
c067545c OC |
286 | else: |
287 | yield line | |
288 | ||
611c1dd9 | 289 | return '\r\n'.join(_fix_subtitle(subtitles)) |