]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
8cfb5bbf | 2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
8cfb5bbf | 5 | |
48246541 | 6 | from .common import InfoExtractor |
1cc79574 | 7 | from ..compat import ( |
3e72f5f1 | 8 | compat_urllib_parse_unquote, |
39201787 | 9 | compat_urllib_parse_urlparse, |
1cc79574 PH |
10 | ) |
11 | from ..utils import ( | |
39201787 | 12 | ExtractorError, |
02ec32a1 | 13 | float_or_none, |
5c2266df | 14 | sanitized_Request, |
6e6bc8da | 15 | urlencode_postdata, |
5cb2d36c | 16 | USER_AGENTS, |
8cfb5bbf | 17 | ) |
18 | ||
19 | ||
48246541 | 20 | class CeskaTelevizeIE(InfoExtractor): |
92519402 | 21 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' |
97243fe3 S |
22 | _TESTS = [{ |
23 | 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | |
24 | 'info_dict': { | |
fcca0d53 | 25 | 'id': '61924494877246241', |
97243fe3 | 26 | 'ext': 'mp4', |
fcca0d53 OC |
27 | 'title': 'Hyde Park Civilizace: Život v Grónsku', |
28 | 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', | |
ec85ded8 | 29 | 'thumbnail': r're:^https?://.*\.jpg', |
97243fe3 S |
30 | 'duration': 3350, |
31 | }, | |
32 | 'params': { | |
33 | # m3u8 download | |
34 | 'skip_download': True, | |
35 | }, | |
3951e7eb S |
36 | }, { |
37 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', | |
38 | 'info_dict': { | |
39 | 'id': '61924494877028507', | |
40 | 'ext': 'mp4', | |
41 | 'title': 'Hyde Park Civilizace: Bonus 01 - En', | |
42 | 'description': 'English Subtittles', | |
ec85ded8 | 43 | 'thumbnail': r're:^https?://.*\.jpg', |
3951e7eb S |
44 | 'duration': 81.3, |
45 | }, | |
46 | 'params': { | |
47 | # m3u8 download | |
48 | 'skip_download': True, | |
49 | }, | |
97243fe3 | 50 | }, { |
f1f6f5aa TČ |
51 | # live stream |
52 | 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | |
97243fe3 | 53 | 'info_dict': { |
f1f6f5aa | 54 | 'id': 402, |
97243fe3 | 55 | 'ext': 'mp4', |
ec85ded8 | 56 | 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', |
f1f6f5aa | 57 | 'is_live': True, |
97243fe3 S |
58 | }, |
59 | 'params': { | |
60 | # m3u8 download | |
61 | 'skip_download': True, | |
62 | }, | |
3951e7eb | 63 | 'skip': 'Georestricted to Czech Republic', |
97243fe3 S |
64 | }, { |
65 | # video with 18+ caution trailer | |
66 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
67 | 'info_dict': { | |
68 | 'id': '215562210900007-bogotart', | |
69 | 'title': 'Queer: Bogotart', | |
70 | 'description': 'Alternativní průvodce současným queer světem', | |
71 | }, | |
72 | 'playlist': [{ | |
7d78f0cc | 73 | 'info_dict': { |
97243fe3 | 74 | 'id': '61924494876844842', |
02ec32a1 | 75 | 'ext': 'mp4', |
97243fe3 S |
76 | 'title': 'Queer: Bogotart (Varování 18+)', |
77 | 'duration': 10.2, | |
7d78f0cc | 78 | }, |
97243fe3 | 79 | }, { |
7d78f0cc | 80 | 'info_dict': { |
97243fe3 | 81 | 'id': '61924494877068022', |
02ec32a1 | 82 | 'ext': 'mp4', |
97243fe3 | 83 | 'title': 'Queer: Bogotart (Queer)', |
ec85ded8 | 84 | 'thumbnail': r're:^https?://.*\.jpg', |
97243fe3 | 85 | 'duration': 1558.3, |
7d78f0cc | 86 | }, |
97243fe3 S |
87 | }], |
88 | 'params': { | |
89 | # m3u8 download | |
90 | 'skip_download': True, | |
8cfb5bbf | 91 | }, |
97243fe3 | 92 | }] |
8cfb5bbf | 93 | |
94 | def _real_extract(self, url): | |
95 | url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | |
96 | ||
97 | mobj = re.match(self._VALID_URL, url) | |
97243fe3 | 98 | playlist_id = mobj.group('id') |
8cfb5bbf | 99 | |
97243fe3 | 100 | webpage = self._download_webpage(url, playlist_id) |
8cfb5bbf | 101 | |
7d78f0cc S |
102 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
103 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
104 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
39201787 | 105 | |
97243fe3 S |
106 | typ = self._html_search_regex( |
107 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | |
108 | episode_id = self._html_search_regex( | |
109 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | |
8cfb5bbf | 110 | |
111 | data = { | |
112 | 'playlist[0][type]': typ, | |
113 | 'playlist[0][id]': episode_id, | |
114 | 'requestUrl': compat_urllib_parse_urlparse(url).path, | |
115 | 'requestSource': 'iVysilani', | |
116 | } | |
117 | ||
97243fe3 | 118 | entries = [] |
5cb2d36c S |
119 | |
120 | for user_agent in (None, USER_AGENTS['Safari']): | |
121 | req = sanitized_Request( | |
122 | 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', | |
123 | data=urlencode_postdata(data)) | |
124 | ||
125 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
126 | req.add_header('x-addr', '127.0.0.1') | |
127 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
128 | if user_agent: | |
129 | req.add_header('User-Agent', user_agent) | |
130 | req.add_header('Referer', url) | |
131 | ||
132 | playlistpage = self._download_json(req, playlist_id, fatal=False) | |
133 | ||
134 | if not playlistpage: | |
135 | continue | |
136 | ||
137 | playlist_url = playlistpage['url'] | |
138 | if playlist_url == 'error_region': | |
139 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
140 | ||
141 | req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) | |
142 | req.add_header('Referer', url) | |
143 | ||
144 | playlist_title = self._og_search_title(webpage, default=None) | |
145 | playlist_description = self._og_search_description(webpage, default=None) | |
146 | ||
147 | playlist = self._download_json(req, playlist_id, fatal=False) | |
148 | if not playlist: | |
149 | continue | |
150 | ||
151 | playlist = playlist.get('playlist') | |
152 | if not isinstance(playlist, list): | |
153 | continue | |
154 | ||
155 | playlist_len = len(playlist) | |
156 | ||
157 | for num, item in enumerate(playlist): | |
158 | is_live = item.get('type') == 'LIVE' | |
159 | formats = [] | |
160 | for format_id, stream_url in item.get('streamUrls', {}).items(): | |
161 | if 'playerType=flash' in stream_url: | |
eafaeb22 | 162 | stream_formats = self._extract_m3u8_formats( |
5cb2d36c S |
163 | stream_url, playlist_id, 'mp4', |
164 | entry_protocol='m3u8' if is_live else 'm3u8_native', | |
eafaeb22 | 165 | m3u8_id='hls-%s' % format_id, fatal=False) |
5cb2d36c | 166 | else: |
eafaeb22 S |
167 | stream_formats = self._extract_mpd_formats( |
168 | stream_url, playlist_id, | |
169 | mpd_id='dash-%s' % format_id, fatal=False) | |
170 | # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031 | |
171 | if format_id == 'audioDescription': | |
172 | for f in stream_formats: | |
173 | f['source_preference'] = -10 | |
174 | formats.extend(stream_formats) | |
5cb2d36c S |
175 | |
176 | if user_agent and len(entries) == playlist_len: | |
177 | entries[num]['formats'].extend(formats) | |
178 | continue | |
179 | ||
180 | item_id = item.get('id') or item['assetId'] | |
181 | title = item['title'] | |
182 | ||
183 | duration = float_or_none(item.get('duration')) | |
184 | thumbnail = item.get('previewImageUrl') | |
185 | ||
186 | subtitles = {} | |
187 | if item.get('type') == 'VOD': | |
188 | subs = item.get('subtitles') | |
189 | if subs: | |
190 | subtitles = self.extract_subtitles(episode_id, subs) | |
191 | ||
192 | if playlist_len == 1: | |
193 | final_title = playlist_title or title | |
194 | if is_live: | |
195 | final_title = self._live_title(final_title) | |
196 | else: | |
197 | final_title = '%s (%s)' % (playlist_title, title) | |
198 | ||
199 | entries.append({ | |
200 | 'id': item_id, | |
201 | 'title': final_title, | |
202 | 'description': playlist_description if playlist_len == 1 else None, | |
203 | 'thumbnail': thumbnail, | |
204 | 'duration': duration, | |
205 | 'formats': formats, | |
206 | 'subtitles': subtitles, | |
207 | 'is_live': is_live, | |
208 | }) | |
209 | ||
210 | for e in entries: | |
211 | self._sort_formats(e['formats']) | |
97243fe3 S |
212 | |
213 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | |
c067545c | 214 | |
48246541 JMF |
215 | def _get_subtitles(self, episode_id, subs): |
216 | original_subtitles = self._download_webpage( | |
217 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
218 | srt_subs = self._fix_subtitles(original_subtitles) | |
219 | return { | |
220 | 'cs': [{ | |
221 | 'ext': 'srt', | |
222 | 'data': srt_subs, | |
223 | }] | |
224 | } | |
225 | ||
27a82a1b S |
226 | @staticmethod |
227 | def _fix_subtitles(subtitles): | |
228 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
229 | |
230 | def _msectotimecode(msec): | |
27a82a1b | 231 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
232 | components = [] |
233 | for divider in [1000, 60, 60, 100]: | |
234 | components.append(msec % divider) | |
235 | msec //= divider | |
611c1dd9 | 236 | return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) |
c067545c OC |
237 | |
238 | def _fix_subtitle(subtitle): | |
239 | for line in subtitle.splitlines(): | |
611c1dd9 | 240 | m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) |
c067545c OC |
241 | if m: |
242 | yield m.group(1) | |
243 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
611c1dd9 | 244 | yield '{0} --> {1}'.format(start, stop) |
c067545c OC |
245 | else: |
246 | yield line | |
247 | ||
611c1dd9 | 248 | return '\r\n'.join(_fix_subtitle(subtitles)) |