]>
Commit | Line | Data |
---|---|---|
8cfb5bbf | 1 | import re |
add96eb9 | 2 | import urllib.parse |
8cfb5bbf | 3 | |
48246541 | 4 | from .common import InfoExtractor |
3d2623a8 | 5 | from ..networking import Request |
1cc79574 | 6 | from ..utils import ( |
39201787 | 7 | ExtractorError, |
02ec32a1 | 8 | float_or_none, |
db4678e4 | 9 | str_or_none, |
443b21dc | 10 | traverse_obj, |
6e6bc8da | 11 | urlencode_postdata, |
8cfb5bbf | 12 | ) |
13 | ||
3d2623a8 | 14 | USER_AGENTS = { |
15 | 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', | |
16 | } | |
17 | ||
8cfb5bbf | 18 | |
48246541 | 19 | class CeskaTelevizeIE(InfoExtractor): |
db4678e4 | 20 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' |
97243fe3 | 21 | _TESTS = [{ |
3951e7eb S |
22 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', |
23 | 'info_dict': { | |
24 | 'id': '61924494877028507', | |
25 | 'ext': 'mp4', | |
db4678e4 | 26 | 'title': 'Bonus 01 - En - Hyde Park Civilizace', |
3951e7eb | 27 | 'description': 'English Subtittles', |
ec85ded8 | 28 | 'thumbnail': r're:^https?://.*\.jpg', |
3951e7eb S |
29 | 'duration': 81.3, |
30 | }, | |
31 | 'params': { | |
32 | # m3u8 download | |
33 | 'skip_download': True, | |
34 | }, | |
97243fe3 | 35 | }, { |
f1f6f5aa | 36 | # live stream |
db4678e4 | 37 | 'url': 'http://www.ceskatelevize.cz/zive/ct1/', |
97243fe3 | 38 | 'info_dict': { |
db4678e4 | 39 | 'id': '102', |
97243fe3 | 40 | 'ext': 'mp4', |
db4678e4 | 41 | 'title': r'ČT1 - živé vysílání online', |
42 | 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', | |
f1f6f5aa | 43 | 'is_live': True, |
97243fe3 S |
44 | }, |
45 | 'params': { | |
46 | # m3u8 download | |
47 | 'skip_download': True, | |
48 | }, | |
db4678e4 | 49 | }, { |
50 | # another | |
51 | 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | |
52 | 'only_matching': True, | |
53 | 'info_dict': { | |
f4f9f6d0 | 54 | 'id': '402', |
db4678e4 | 55 | 'ext': 'mp4', |
56 | 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | |
57 | 'is_live': True, | |
58 | }, | |
59 | # 'skip': 'Georestricted to Czech Republic', | |
97243fe3 | 60 | }, { |
e18f1da9 S |
61 | 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', |
62 | 'only_matching': True, | |
92592bd3 M |
63 | }, { |
64 | # video with 18+ caution trailer | |
65 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
66 | 'info_dict': { | |
67 | 'id': '215562210900007-bogotart', | |
db4678e4 | 68 | 'title': 'Bogotart - Queer', |
69 | 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', | |
92592bd3 M |
70 | }, |
71 | 'playlist': [{ | |
72 | 'info_dict': { | |
73 | 'id': '61924494877311053', | |
74 | 'ext': 'mp4', | |
db4678e4 | 75 | 'title': 'Bogotart - Queer (Varování 18+)', |
92592bd3 M |
76 | 'duration': 11.9, |
77 | }, | |
78 | }, { | |
79 | 'info_dict': { | |
80 | 'id': '61924494877068022', | |
81 | 'ext': 'mp4', | |
db4678e4 | 82 | 'title': 'Bogotart - Queer (Queer)', |
92592bd3 M |
83 | 'thumbnail': r're:^https?://.*\.jpg', |
84 | 'duration': 1558.3, | |
85 | }, | |
86 | }], | |
87 | 'params': { | |
88 | # m3u8 download | |
89 | 'skip_download': True, | |
90 | }, | |
91 | }, { | |
92 | # iframe embed | |
93 | 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', | |
94 | 'only_matching': True, | |
97243fe3 | 95 | }] |
8cfb5bbf | 96 | |
97 | def _real_extract(self, url): | |
e18f1da9 | 98 | playlist_id = self._match_id(url) |
db4678e4 | 99 | webpage, urlh = self._download_webpage_handle(url, playlist_id) |
add96eb9 | 100 | parsed_url = urllib.parse.urlparse(urlh.url) |
db4678e4 | 101 | site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') |
92592bd3 M |
102 | playlist_title = self._og_search_title(webpage, default=None) |
103 | if site_name and playlist_title: | |
add96eb9 | 104 | playlist_title = re.split(rf'\s*[—|]\s*{site_name}', playlist_title, maxsplit=1)[0] |
92592bd3 M |
105 | playlist_description = self._og_search_description(webpage, default=None) |
106 | if playlist_description: | |
107 | playlist_description = playlist_description.replace('\xa0', ' ') | |
108 | ||
db4678e4 | 109 | type_ = 'IDEC' |
110 | if re.search(r'(^/porady|/zive)/', parsed_url.path): | |
443b21dc | 111 | next_data = self._search_nextjs_data(webpage, playlist_id) |
db4678e4 | 112 | if '/zive/' in parsed_url.path: |
113 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) | |
114 | else: | |
115 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) | |
116 | if not idec: | |
117 | idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) | |
118 | if idec: | |
119 | type_ = 'bonus' | |
443b21dc MK |
120 | if not idec: |
121 | raise ExtractorError('Failed to find IDEC id') | |
db4678e4 | 122 | iframe_hash = self._download_webpage( |
123 | 'https://www.ceskatelevize.cz/v-api/iframe-hash/', | |
124 | playlist_id, note='Getting IFRAME hash') | |
add96eb9 | 125 | query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec} |
db4678e4 | 126 | webpage = self._download_webpage( |
127 | 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', | |
128 | playlist_id, note='Downloading player', query=query) | |
8cfb5bbf | 129 | |
7d78f0cc | 130 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
add96eb9 | 131 | if f'{NOT_AVAILABLE_STRING}</p>' in webpage: |
db4678e4 | 132 | self.raise_geo_restricted(NOT_AVAILABLE_STRING) |
add96eb9 | 133 | if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')): |
db4678e4 | 134 | raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) |
39201787 | 135 | |
e18f1da9 S |
136 | type_ = None |
137 | episode_id = None | |
138 | ||
139 | playlist = self._parse_json( | |
140 | self._search_regex( | |
141 | r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', | |
142 | default='{}'), playlist_id) | |
143 | if playlist: | |
144 | type_ = playlist.get('type') | |
145 | episode_id = playlist.get('id') | |
146 | ||
147 | if not type_: | |
148 | type_ = self._html_search_regex( | |
149 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', | |
150 | webpage, 'type') | |
151 | if not episode_id: | |
152 | episode_id = self._html_search_regex( | |
153 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', | |
154 | webpage, 'episode_id') | |
8cfb5bbf | 155 | |
156 | data = { | |
e18f1da9 | 157 | 'playlist[0][type]': type_, |
8cfb5bbf | 158 | 'playlist[0][id]': episode_id, |
92592bd3 | 159 | 'requestUrl': parsed_url.path, |
8cfb5bbf | 160 | 'requestSource': 'iVysilani', |
161 | } | |
162 | ||
97243fe3 | 163 | entries = [] |
5cb2d36c S |
164 | |
165 | for user_agent in (None, USER_AGENTS['Safari']): | |
3d2623a8 | 166 | req = Request( |
92592bd3 | 167 | 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', |
5cb2d36c S |
168 | data=urlencode_postdata(data)) |
169 | ||
3d2623a8 | 170 | req.headers['Content-type'] = 'application/x-www-form-urlencoded' |
171 | req.headers['x-addr'] = '127.0.0.1' | |
172 | req.headers['X-Requested-With'] = 'XMLHttpRequest' | |
5cb2d36c | 173 | if user_agent: |
3d2623a8 | 174 | req.headers['User-Agent'] = user_agent |
175 | req.headers['Referer'] = url | |
5cb2d36c S |
176 | |
177 | playlistpage = self._download_json(req, playlist_id, fatal=False) | |
178 | ||
179 | if not playlistpage: | |
180 | continue | |
181 | ||
182 | playlist_url = playlistpage['url'] | |
183 | if playlist_url == 'error_region': | |
184 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
185 | ||
add96eb9 | 186 | req = Request(urllib.parse.unquote(playlist_url)) |
3d2623a8 | 187 | req.headers['Referer'] = url |
5cb2d36c | 188 | |
5cb2d36c S |
189 | playlist = self._download_json(req, playlist_id, fatal=False) |
190 | if not playlist: | |
191 | continue | |
192 | ||
193 | playlist = playlist.get('playlist') | |
194 | if not isinstance(playlist, list): | |
195 | continue | |
196 | ||
197 | playlist_len = len(playlist) | |
198 | ||
199 | for num, item in enumerate(playlist): | |
200 | is_live = item.get('type') == 'LIVE' | |
201 | formats = [] | |
202 | for format_id, stream_url in item.get('streamUrls', {}).items(): | |
203 | if 'playerType=flash' in stream_url: | |
eafaeb22 | 204 | stream_formats = self._extract_m3u8_formats( |
fb4fc449 | 205 | stream_url, playlist_id, 'mp4', 'm3u8_native', |
add96eb9 | 206 | m3u8_id=f'hls-{format_id}', fatal=False) |
5cb2d36c | 207 | else: |
eafaeb22 S |
208 | stream_formats = self._extract_mpd_formats( |
209 | stream_url, playlist_id, | |
add96eb9 | 210 | mpd_id=f'dash-{format_id}', fatal=False) |
88acdbc2 | 211 | if 'drmOnly=true' in stream_url: |
212 | for f in stream_formats: | |
213 | f['has_drm'] = True | |
067aa17e | 214 | # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 |
eafaeb22 S |
215 | if format_id == 'audioDescription': |
216 | for f in stream_formats: | |
217 | f['source_preference'] = -10 | |
218 | formats.extend(stream_formats) | |
5cb2d36c S |
219 | |
220 | if user_agent and len(entries) == playlist_len: | |
221 | entries[num]['formats'].extend(formats) | |
222 | continue | |
223 | ||
db4678e4 | 224 | item_id = str_or_none(item.get('id') or item['assetId']) |
5cb2d36c S |
225 | title = item['title'] |
226 | ||
227 | duration = float_or_none(item.get('duration')) | |
228 | thumbnail = item.get('previewImageUrl') | |
229 | ||
230 | subtitles = {} | |
231 | if item.get('type') == 'VOD': | |
232 | subs = item.get('subtitles') | |
233 | if subs: | |
234 | subtitles = self.extract_subtitles(episode_id, subs) | |
235 | ||
236 | if playlist_len == 1: | |
237 | final_title = playlist_title or title | |
5cb2d36c | 238 | else: |
add96eb9 | 239 | final_title = f'{playlist_title} ({title})' |
5cb2d36c S |
240 | |
241 | entries.append({ | |
242 | 'id': item_id, | |
243 | 'title': final_title, | |
244 | 'description': playlist_description if playlist_len == 1 else None, | |
245 | 'thumbnail': thumbnail, | |
246 | 'duration': duration, | |
247 | 'formats': formats, | |
248 | 'subtitles': subtitles, | |
249 | 'is_live': is_live, | |
250 | }) | |
251 | ||
db4678e4 | 252 | if len(entries) == 1: |
253 | return entries[0] | |
97243fe3 | 254 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) |
c067545c | 255 | |
48246541 JMF |
256 | def _get_subtitles(self, episode_id, subs): |
257 | original_subtitles = self._download_webpage( | |
258 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
259 | srt_subs = self._fix_subtitles(original_subtitles) | |
260 | return { | |
261 | 'cs': [{ | |
262 | 'ext': 'srt', | |
263 | 'data': srt_subs, | |
add96eb9 | 264 | }], |
48246541 JMF |
265 | } |
266 | ||
27a82a1b S |
267 | @staticmethod |
268 | def _fix_subtitles(subtitles): | |
269 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
270 | |
271 | def _msectotimecode(msec): | |
27a82a1b | 272 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
273 | components = [] |
274 | for divider in [1000, 60, 60, 100]: | |
275 | components.append(msec % divider) | |
276 | msec //= divider | |
611c1dd9 | 277 | return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) |
c067545c OC |
278 | |
279 | def _fix_subtitle(subtitle): | |
280 | for line in subtitle.splitlines(): | |
611c1dd9 | 281 | m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) |
c067545c OC |
282 | if m: |
283 | yield m.group(1) | |
284 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
add96eb9 | 285 | yield f'{start} --> {stop}' |
c067545c OC |
286 | else: |
287 | yield line | |
288 | ||
611c1dd9 | 289 | return '\r\n'.join(_fix_subtitle(subtitles)) |