]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
8cfb5bbf | 2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
8cfb5bbf | 5 | |
48246541 | 6 | from .common import InfoExtractor |
1cc79574 | 7 | from ..compat import ( |
3e72f5f1 | 8 | compat_urllib_parse_unquote, |
39201787 | 9 | compat_urllib_parse_urlparse, |
1cc79574 PH |
10 | ) |
11 | from ..utils import ( | |
39201787 | 12 | ExtractorError, |
02ec32a1 | 13 | float_or_none, |
5c2266df | 14 | sanitized_Request, |
443b21dc | 15 | traverse_obj, |
6e6bc8da | 16 | urlencode_postdata, |
5cb2d36c | 17 | USER_AGENTS, |
8cfb5bbf | 18 | ) |
19 | ||
20 | ||
48246541 | 21 | class CeskaTelevizeIE(InfoExtractor): |
92592bd3 | 22 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' |
97243fe3 | 23 | _TESTS = [{ |
3951e7eb S |
24 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', |
25 | 'info_dict': { | |
26 | 'id': '61924494877028507', | |
27 | 'ext': 'mp4', | |
28 | 'title': 'Hyde Park Civilizace: Bonus 01 - En', | |
29 | 'description': 'English Subtittles', | |
ec85ded8 | 30 | 'thumbnail': r're:^https?://.*\.jpg', |
3951e7eb S |
31 | 'duration': 81.3, |
32 | }, | |
33 | 'params': { | |
34 | # m3u8 download | |
35 | 'skip_download': True, | |
36 | }, | |
97243fe3 | 37 | }, { |
f1f6f5aa TČ |
38 | # live stream |
39 | 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | |
97243fe3 | 40 | 'info_dict': { |
f1f6f5aa | 41 | 'id': 402, |
97243fe3 | 42 | 'ext': 'mp4', |
ec85ded8 | 43 | 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', |
f1f6f5aa | 44 | 'is_live': True, |
97243fe3 S |
45 | }, |
46 | 'params': { | |
47 | # m3u8 download | |
48 | 'skip_download': True, | |
49 | }, | |
3951e7eb | 50 | 'skip': 'Georestricted to Czech Republic', |
97243fe3 | 51 | }, { |
e18f1da9 S |
52 | 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', |
53 | 'only_matching': True, | |
92592bd3 M |
54 | }, { |
55 | # video with 18+ caution trailer | |
56 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
57 | 'info_dict': { | |
58 | 'id': '215562210900007-bogotart', | |
59 | 'title': 'Queer: Bogotart', | |
60 | 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', | |
61 | }, | |
62 | 'playlist': [{ | |
63 | 'info_dict': { | |
64 | 'id': '61924494877311053', | |
65 | 'ext': 'mp4', | |
66 | 'title': 'Queer: Bogotart (Varování 18+)', | |
67 | 'duration': 11.9, | |
68 | }, | |
69 | }, { | |
70 | 'info_dict': { | |
71 | 'id': '61924494877068022', | |
72 | 'ext': 'mp4', | |
73 | 'title': 'Queer: Bogotart (Queer)', | |
74 | 'thumbnail': r're:^https?://.*\.jpg', | |
75 | 'duration': 1558.3, | |
76 | }, | |
77 | }], | |
78 | 'params': { | |
79 | # m3u8 download | |
80 | 'skip_download': True, | |
81 | }, | |
82 | }, { | |
83 | # iframe embed | |
84 | 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', | |
85 | 'only_matching': True, | |
97243fe3 | 86 | }] |
8cfb5bbf | 87 | |
88 | def _real_extract(self, url): | |
e18f1da9 | 89 | playlist_id = self._match_id(url) |
92592bd3 | 90 | parsed_url = compat_urllib_parse_urlparse(url) |
97243fe3 | 91 | webpage = self._download_webpage(url, playlist_id) |
92592bd3 M |
92 | site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) |
93 | playlist_title = self._og_search_title(webpage, default=None) | |
94 | if site_name and playlist_title: | |
95 | playlist_title = playlist_title.replace(f' — {site_name}', '', 1) | |
96 | playlist_description = self._og_search_description(webpage, default=None) | |
97 | if playlist_description: | |
98 | playlist_description = playlist_description.replace('\xa0', ' ') | |
99 | ||
100 | if parsed_url.path.startswith('/porady/'): | |
443b21dc MK |
101 | next_data = self._search_nextjs_data(webpage, playlist_id) |
102 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) | |
103 | if not idec: | |
104 | raise ExtractorError('Failed to find IDEC id') | |
105 | iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) | |
106 | webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, | |
107 | query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) | |
8cfb5bbf | 108 | |
7d78f0cc S |
109 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
110 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
111 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
39201787 | 112 | |
e18f1da9 S |
113 | type_ = None |
114 | episode_id = None | |
115 | ||
116 | playlist = self._parse_json( | |
117 | self._search_regex( | |
118 | r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', | |
119 | default='{}'), playlist_id) | |
120 | if playlist: | |
121 | type_ = playlist.get('type') | |
122 | episode_id = playlist.get('id') | |
123 | ||
124 | if not type_: | |
125 | type_ = self._html_search_regex( | |
126 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', | |
127 | webpage, 'type') | |
128 | if not episode_id: | |
129 | episode_id = self._html_search_regex( | |
130 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', | |
131 | webpage, 'episode_id') | |
8cfb5bbf | 132 | |
133 | data = { | |
e18f1da9 | 134 | 'playlist[0][type]': type_, |
8cfb5bbf | 135 | 'playlist[0][id]': episode_id, |
92592bd3 | 136 | 'requestUrl': parsed_url.path, |
8cfb5bbf | 137 | 'requestSource': 'iVysilani', |
138 | } | |
139 | ||
97243fe3 | 140 | entries = [] |
5cb2d36c S |
141 | |
142 | for user_agent in (None, USER_AGENTS['Safari']): | |
143 | req = sanitized_Request( | |
92592bd3 | 144 | 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', |
5cb2d36c S |
145 | data=urlencode_postdata(data)) |
146 | ||
147 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
148 | req.add_header('x-addr', '127.0.0.1') | |
149 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
150 | if user_agent: | |
151 | req.add_header('User-Agent', user_agent) | |
152 | req.add_header('Referer', url) | |
153 | ||
154 | playlistpage = self._download_json(req, playlist_id, fatal=False) | |
155 | ||
156 | if not playlistpage: | |
157 | continue | |
158 | ||
159 | playlist_url = playlistpage['url'] | |
160 | if playlist_url == 'error_region': | |
161 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
162 | ||
163 | req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) | |
164 | req.add_header('Referer', url) | |
165 | ||
5cb2d36c S |
166 | playlist = self._download_json(req, playlist_id, fatal=False) |
167 | if not playlist: | |
168 | continue | |
169 | ||
170 | playlist = playlist.get('playlist') | |
171 | if not isinstance(playlist, list): | |
172 | continue | |
173 | ||
174 | playlist_len = len(playlist) | |
175 | ||
176 | for num, item in enumerate(playlist): | |
177 | is_live = item.get('type') == 'LIVE' | |
178 | formats = [] | |
179 | for format_id, stream_url in item.get('streamUrls', {}).items(): | |
46365484 | 180 | stream_url = stream_url.replace('https://', 'http://') |
5cb2d36c | 181 | if 'playerType=flash' in stream_url: |
eafaeb22 | 182 | stream_formats = self._extract_m3u8_formats( |
fb4fc449 | 183 | stream_url, playlist_id, 'mp4', 'm3u8_native', |
eafaeb22 | 184 | m3u8_id='hls-%s' % format_id, fatal=False) |
5cb2d36c | 185 | else: |
eafaeb22 S |
186 | stream_formats = self._extract_mpd_formats( |
187 | stream_url, playlist_id, | |
188 | mpd_id='dash-%s' % format_id, fatal=False) | |
88acdbc2 | 189 | if 'drmOnly=true' in stream_url: |
190 | for f in stream_formats: | |
191 | f['has_drm'] = True | |
067aa17e | 192 | # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 |
eafaeb22 S |
193 | if format_id == 'audioDescription': |
194 | for f in stream_formats: | |
195 | f['source_preference'] = -10 | |
196 | formats.extend(stream_formats) | |
5cb2d36c S |
197 | |
198 | if user_agent and len(entries) == playlist_len: | |
199 | entries[num]['formats'].extend(formats) | |
200 | continue | |
201 | ||
202 | item_id = item.get('id') or item['assetId'] | |
203 | title = item['title'] | |
204 | ||
205 | duration = float_or_none(item.get('duration')) | |
206 | thumbnail = item.get('previewImageUrl') | |
207 | ||
208 | subtitles = {} | |
209 | if item.get('type') == 'VOD': | |
210 | subs = item.get('subtitles') | |
211 | if subs: | |
212 | subtitles = self.extract_subtitles(episode_id, subs) | |
213 | ||
214 | if playlist_len == 1: | |
215 | final_title = playlist_title or title | |
5cb2d36c S |
216 | else: |
217 | final_title = '%s (%s)' % (playlist_title, title) | |
218 | ||
219 | entries.append({ | |
220 | 'id': item_id, | |
221 | 'title': final_title, | |
222 | 'description': playlist_description if playlist_len == 1 else None, | |
223 | 'thumbnail': thumbnail, | |
224 | 'duration': duration, | |
225 | 'formats': formats, | |
226 | 'subtitles': subtitles, | |
227 | 'is_live': is_live, | |
228 | }) | |
229 | ||
230 | for e in entries: | |
231 | self._sort_formats(e['formats']) | |
97243fe3 S |
232 | |
233 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | |
c067545c | 234 | |
48246541 JMF |
235 | def _get_subtitles(self, episode_id, subs): |
236 | original_subtitles = self._download_webpage( | |
237 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
238 | srt_subs = self._fix_subtitles(original_subtitles) | |
239 | return { | |
240 | 'cs': [{ | |
241 | 'ext': 'srt', | |
242 | 'data': srt_subs, | |
243 | }] | |
244 | } | |
245 | ||
27a82a1b S |
246 | @staticmethod |
247 | def _fix_subtitles(subtitles): | |
248 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
249 | |
250 | def _msectotimecode(msec): | |
27a82a1b | 251 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
252 | components = [] |
253 | for divider in [1000, 60, 60, 100]: | |
254 | components.append(msec % divider) | |
255 | msec //= divider | |
611c1dd9 | 256 | return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) |
c067545c OC |
257 | |
258 | def _fix_subtitle(subtitle): | |
259 | for line in subtitle.splitlines(): | |
611c1dd9 | 260 | m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) |
c067545c OC |
261 | if m: |
262 | yield m.group(1) | |
263 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
611c1dd9 | 264 | yield '{0} --> {1}'.format(start, stop) |
c067545c OC |
265 | else: |
266 | yield line | |
267 | ||
611c1dd9 | 268 | return '\r\n'.join(_fix_subtitle(subtitles)) |