]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
8cfb5bbf | 2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
8cfb5bbf | 5 | |
48246541 | 6 | from .common import InfoExtractor |
1cc79574 | 7 | from ..compat import ( |
3e72f5f1 | 8 | compat_urllib_parse_unquote, |
39201787 | 9 | compat_urllib_parse_urlparse, |
1cc79574 PH |
10 | ) |
11 | from ..utils import ( | |
39201787 | 12 | ExtractorError, |
02ec32a1 | 13 | float_or_none, |
5c2266df | 14 | sanitized_Request, |
28b674ca | 15 | unescapeHTML, |
9a054fcb | 16 | update_url_query, |
6e6bc8da | 17 | urlencode_postdata, |
5cb2d36c | 18 | USER_AGENTS, |
8cfb5bbf | 19 | ) |
20 | ||
21 | ||
48246541 | 22 | class CeskaTelevizeIE(InfoExtractor): |
e18f1da9 | 23 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' |
97243fe3 S |
24 | _TESTS = [{ |
25 | 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | |
26 | 'info_dict': { | |
fcca0d53 | 27 | 'id': '61924494877246241', |
97243fe3 | 28 | 'ext': 'mp4', |
fcca0d53 OC |
29 | 'title': 'Hyde Park Civilizace: Život v Grónsku', |
30 | 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', | |
ec85ded8 | 31 | 'thumbnail': r're:^https?://.*\.jpg', |
97243fe3 S |
32 | 'duration': 3350, |
33 | }, | |
34 | 'params': { | |
35 | # m3u8 download | |
36 | 'skip_download': True, | |
37 | }, | |
3951e7eb S |
38 | }, { |
39 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', | |
40 | 'info_dict': { | |
41 | 'id': '61924494877028507', | |
42 | 'ext': 'mp4', | |
43 | 'title': 'Hyde Park Civilizace: Bonus 01 - En', | |
44 | 'description': 'English Subtittles', | |
ec85ded8 | 45 | 'thumbnail': r're:^https?://.*\.jpg', |
3951e7eb S |
46 | 'duration': 81.3, |
47 | }, | |
48 | 'params': { | |
49 | # m3u8 download | |
50 | 'skip_download': True, | |
51 | }, | |
97243fe3 | 52 | }, { |
f1f6f5aa TČ |
53 | # live stream |
54 | 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | |
97243fe3 | 55 | 'info_dict': { |
f1f6f5aa | 56 | 'id': 402, |
97243fe3 | 57 | 'ext': 'mp4', |
ec85ded8 | 58 | 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', |
f1f6f5aa | 59 | 'is_live': True, |
97243fe3 S |
60 | }, |
61 | 'params': { | |
62 | # m3u8 download | |
63 | 'skip_download': True, | |
64 | }, | |
3951e7eb | 65 | 'skip': 'Georestricted to Czech Republic', |
97243fe3 | 66 | }, { |
e18f1da9 S |
67 | 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', |
68 | 'only_matching': True, | |
97243fe3 | 69 | }] |
8cfb5bbf | 70 | |
71 | def _real_extract(self, url): | |
e18f1da9 | 72 | playlist_id = self._match_id(url) |
8cfb5bbf | 73 | |
97243fe3 | 74 | webpage = self._download_webpage(url, playlist_id) |
8cfb5bbf | 75 | |
7d78f0cc S |
76 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
77 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
78 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
39201787 | 79 | |
e18f1da9 S |
80 | type_ = None |
81 | episode_id = None | |
82 | ||
83 | playlist = self._parse_json( | |
84 | self._search_regex( | |
85 | r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', | |
86 | default='{}'), playlist_id) | |
87 | if playlist: | |
88 | type_ = playlist.get('type') | |
89 | episode_id = playlist.get('id') | |
90 | ||
91 | if not type_: | |
92 | type_ = self._html_search_regex( | |
93 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', | |
94 | webpage, 'type') | |
95 | if not episode_id: | |
96 | episode_id = self._html_search_regex( | |
97 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', | |
98 | webpage, 'episode_id') | |
8cfb5bbf | 99 | |
100 | data = { | |
e18f1da9 | 101 | 'playlist[0][type]': type_, |
8cfb5bbf | 102 | 'playlist[0][id]': episode_id, |
103 | 'requestUrl': compat_urllib_parse_urlparse(url).path, | |
104 | 'requestSource': 'iVysilani', | |
105 | } | |
106 | ||
97243fe3 | 107 | entries = [] |
5cb2d36c S |
108 | |
109 | for user_agent in (None, USER_AGENTS['Safari']): | |
110 | req = sanitized_Request( | |
1a88fc5a | 111 | 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', |
5cb2d36c S |
112 | data=urlencode_postdata(data)) |
113 | ||
114 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
115 | req.add_header('x-addr', '127.0.0.1') | |
116 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
117 | if user_agent: | |
118 | req.add_header('User-Agent', user_agent) | |
119 | req.add_header('Referer', url) | |
120 | ||
121 | playlistpage = self._download_json(req, playlist_id, fatal=False) | |
122 | ||
123 | if not playlistpage: | |
124 | continue | |
125 | ||
126 | playlist_url = playlistpage['url'] | |
127 | if playlist_url == 'error_region': | |
128 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
129 | ||
130 | req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) | |
131 | req.add_header('Referer', url) | |
132 | ||
133 | playlist_title = self._og_search_title(webpage, default=None) | |
134 | playlist_description = self._og_search_description(webpage, default=None) | |
135 | ||
136 | playlist = self._download_json(req, playlist_id, fatal=False) | |
137 | if not playlist: | |
138 | continue | |
139 | ||
140 | playlist = playlist.get('playlist') | |
141 | if not isinstance(playlist, list): | |
142 | continue | |
143 | ||
144 | playlist_len = len(playlist) | |
145 | ||
146 | for num, item in enumerate(playlist): | |
147 | is_live = item.get('type') == 'LIVE' | |
148 | formats = [] | |
149 | for format_id, stream_url in item.get('streamUrls', {}).items(): | |
07154c79 RA |
150 | if 'drmOnly=true' in stream_url: |
151 | continue | |
5cb2d36c | 152 | if 'playerType=flash' in stream_url: |
eafaeb22 | 153 | stream_formats = self._extract_m3u8_formats( |
fb4fc449 | 154 | stream_url, playlist_id, 'mp4', 'm3u8_native', |
eafaeb22 | 155 | m3u8_id='hls-%s' % format_id, fatal=False) |
5cb2d36c | 156 | else: |
eafaeb22 S |
157 | stream_formats = self._extract_mpd_formats( |
158 | stream_url, playlist_id, | |
159 | mpd_id='dash-%s' % format_id, fatal=False) | |
067aa17e | 160 | # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 |
eafaeb22 S |
161 | if format_id == 'audioDescription': |
162 | for f in stream_formats: | |
163 | f['source_preference'] = -10 | |
164 | formats.extend(stream_formats) | |
5cb2d36c S |
165 | |
166 | if user_agent and len(entries) == playlist_len: | |
167 | entries[num]['formats'].extend(formats) | |
168 | continue | |
169 | ||
170 | item_id = item.get('id') or item['assetId'] | |
171 | title = item['title'] | |
172 | ||
173 | duration = float_or_none(item.get('duration')) | |
174 | thumbnail = item.get('previewImageUrl') | |
175 | ||
176 | subtitles = {} | |
177 | if item.get('type') == 'VOD': | |
178 | subs = item.get('subtitles') | |
179 | if subs: | |
180 | subtitles = self.extract_subtitles(episode_id, subs) | |
181 | ||
182 | if playlist_len == 1: | |
183 | final_title = playlist_title or title | |
184 | if is_live: | |
185 | final_title = self._live_title(final_title) | |
186 | else: | |
187 | final_title = '%s (%s)' % (playlist_title, title) | |
188 | ||
189 | entries.append({ | |
190 | 'id': item_id, | |
191 | 'title': final_title, | |
192 | 'description': playlist_description if playlist_len == 1 else None, | |
193 | 'thumbnail': thumbnail, | |
194 | 'duration': duration, | |
195 | 'formats': formats, | |
196 | 'subtitles': subtitles, | |
197 | 'is_live': is_live, | |
198 | }) | |
199 | ||
200 | for e in entries: | |
201 | self._sort_formats(e['formats']) | |
97243fe3 S |
202 | |
203 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | |
c067545c | 204 | |
48246541 JMF |
205 | def _get_subtitles(self, episode_id, subs): |
206 | original_subtitles = self._download_webpage( | |
207 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
208 | srt_subs = self._fix_subtitles(original_subtitles) | |
209 | return { | |
210 | 'cs': [{ | |
211 | 'ext': 'srt', | |
212 | 'data': srt_subs, | |
213 | }] | |
214 | } | |
215 | ||
27a82a1b S |
216 | @staticmethod |
217 | def _fix_subtitles(subtitles): | |
218 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
219 | |
220 | def _msectotimecode(msec): | |
27a82a1b | 221 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
222 | components = [] |
223 | for divider in [1000, 60, 60, 100]: | |
224 | components.append(msec % divider) | |
225 | msec //= divider | |
611c1dd9 | 226 | return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) |
c067545c OC |
227 | |
228 | def _fix_subtitle(subtitle): | |
229 | for line in subtitle.splitlines(): | |
611c1dd9 | 230 | m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) |
c067545c OC |
231 | if m: |
232 | yield m.group(1) | |
233 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
611c1dd9 | 234 | yield '{0} --> {1}'.format(start, stop) |
c067545c OC |
235 | else: |
236 | yield line | |
237 | ||
611c1dd9 | 238 | return '\r\n'.join(_fix_subtitle(subtitles)) |
28b674ca S |
239 | |
240 | ||
241 | class CeskaTelevizePoradyIE(InfoExtractor): | |
242 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' | |
243 | _TESTS = [{ | |
244 | # video with 18+ caution trailer | |
245 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
246 | 'info_dict': { | |
247 | 'id': '215562210900007-bogotart', | |
248 | 'title': 'Queer: Bogotart', | |
249 | 'description': 'Alternativní průvodce současným queer světem', | |
250 | }, | |
251 | 'playlist': [{ | |
252 | 'info_dict': { | |
253 | 'id': '61924494876844842', | |
254 | 'ext': 'mp4', | |
255 | 'title': 'Queer: Bogotart (Varování 18+)', | |
256 | 'duration': 10.2, | |
257 | }, | |
258 | }, { | |
259 | 'info_dict': { | |
260 | 'id': '61924494877068022', | |
261 | 'ext': 'mp4', | |
262 | 'title': 'Queer: Bogotart (Queer)', | |
263 | 'thumbnail': r're:^https?://.*\.jpg', | |
264 | 'duration': 1558.3, | |
265 | }, | |
266 | }], | |
267 | 'params': { | |
268 | # m3u8 download | |
269 | 'skip_download': True, | |
270 | }, | |
9a054fcb S |
271 | }, { |
272 | # iframe embed | |
273 | 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', | |
274 | 'only_matching': True, | |
28b674ca S |
275 | }] |
276 | ||
277 | def _real_extract(self, url): | |
278 | video_id = self._match_id(url) | |
279 | ||
280 | webpage = self._download_webpage(url, video_id) | |
281 | ||
9a054fcb S |
282 | data_url = update_url_query(unescapeHTML(self._search_regex( |
283 | (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', | |
284 | r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), | |
285 | webpage, 'iframe player url', group='url')), query={ | |
286 | 'autoStart': 'true', | |
287 | }) | |
28b674ca S |
288 | |
289 | return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) |