]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
8cfb5bbf | 2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
8cfb5bbf | 5 | |
48246541 | 6 | from .common import InfoExtractor |
1cc79574 | 7 | from ..compat import ( |
3e72f5f1 | 8 | compat_urllib_parse_unquote, |
39201787 | 9 | compat_urllib_parse_urlparse, |
1cc79574 PH |
10 | ) |
11 | from ..utils import ( | |
39201787 | 12 | ExtractorError, |
02ec32a1 | 13 | float_or_none, |
5c2266df | 14 | sanitized_Request, |
6e6bc8da | 15 | urlencode_postdata, |
8cfb5bbf | 16 | ) |
17 | ||
18 | ||
48246541 | 19 | class CeskaTelevizeIE(InfoExtractor): |
92519402 | 20 | _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' |
97243fe3 S |
21 | _TESTS = [{ |
22 | 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | |
23 | 'info_dict': { | |
24 | 'id': '61924494876951776', | |
25 | 'ext': 'mp4', | |
26 | 'title': 'Hyde Park Civilizace', | |
27 | 'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', | |
28 | 'thumbnail': 're:^https?://.*\.jpg', | |
29 | 'duration': 3350, | |
30 | }, | |
31 | 'params': { | |
32 | # m3u8 download | |
33 | 'skip_download': True, | |
34 | }, | |
3951e7eb S |
35 | }, { |
36 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', | |
37 | 'info_dict': { | |
38 | 'id': '61924494877028507', | |
39 | 'ext': 'mp4', | |
40 | 'title': 'Hyde Park Civilizace: Bonus 01 - En', | |
41 | 'description': 'English Subtittles', | |
42 | 'thumbnail': 're:^https?://.*\.jpg', | |
43 | 'duration': 81.3, | |
44 | }, | |
45 | 'params': { | |
46 | # m3u8 download | |
47 | 'skip_download': True, | |
48 | }, | |
97243fe3 | 49 | }, { |
f1f6f5aa TČ |
50 | # live stream |
51 | 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | |
97243fe3 | 52 | 'info_dict': { |
f1f6f5aa | 53 | 'id': 402, |
97243fe3 | 54 | 'ext': 'mp4', |
3951e7eb | 55 | 'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', |
f1f6f5aa | 56 | 'is_live': True, |
97243fe3 S |
57 | }, |
58 | 'params': { | |
59 | # m3u8 download | |
60 | 'skip_download': True, | |
61 | }, | |
3951e7eb | 62 | 'skip': 'Georestricted to Czech Republic', |
97243fe3 S |
63 | }, { |
64 | # video with 18+ caution trailer | |
65 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
66 | 'info_dict': { | |
67 | 'id': '215562210900007-bogotart', | |
68 | 'title': 'Queer: Bogotart', | |
69 | 'description': 'Alternativní průvodce současným queer světem', | |
70 | }, | |
71 | 'playlist': [{ | |
7d78f0cc | 72 | 'info_dict': { |
97243fe3 | 73 | 'id': '61924494876844842', |
02ec32a1 | 74 | 'ext': 'mp4', |
97243fe3 S |
75 | 'title': 'Queer: Bogotart (Varování 18+)', |
76 | 'duration': 10.2, | |
7d78f0cc | 77 | }, |
97243fe3 | 78 | }, { |
7d78f0cc | 79 | 'info_dict': { |
97243fe3 | 80 | 'id': '61924494877068022', |
02ec32a1 | 81 | 'ext': 'mp4', |
97243fe3 | 82 | 'title': 'Queer: Bogotart (Queer)', |
02ec32a1 | 83 | 'thumbnail': 're:^https?://.*\.jpg', |
97243fe3 | 84 | 'duration': 1558.3, |
7d78f0cc | 85 | }, |
97243fe3 S |
86 | }], |
87 | 'params': { | |
88 | # m3u8 download | |
89 | 'skip_download': True, | |
8cfb5bbf | 90 | }, |
97243fe3 | 91 | }] |
8cfb5bbf | 92 | |
93 | def _real_extract(self, url): | |
94 | url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | |
95 | ||
96 | mobj = re.match(self._VALID_URL, url) | |
97243fe3 | 97 | playlist_id = mobj.group('id') |
8cfb5bbf | 98 | |
97243fe3 | 99 | webpage = self._download_webpage(url, playlist_id) |
8cfb5bbf | 100 | |
7d78f0cc S |
101 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
102 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
103 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
39201787 | 104 | |
97243fe3 S |
105 | typ = self._html_search_regex( |
106 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | |
107 | episode_id = self._html_search_regex( | |
108 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | |
8cfb5bbf | 109 | |
110 | data = { | |
111 | 'playlist[0][type]': typ, | |
112 | 'playlist[0][id]': episode_id, | |
113 | 'requestUrl': compat_urllib_parse_urlparse(url).path, | |
114 | 'requestSource': 'iVysilani', | |
115 | } | |
116 | ||
5c2266df | 117 | req = sanitized_Request( |
02ec32a1 | 118 | 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', |
6e6bc8da | 119 | data=urlencode_postdata(data)) |
8cfb5bbf | 120 | |
121 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
122 | req.add_header('x-addr', '127.0.0.1') | |
123 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
124 | req.add_header('Referer', url) | |
125 | ||
97243fe3 | 126 | playlistpage = self._download_json(req, playlist_id) |
8cfb5bbf | 127 | |
02ec32a1 S |
128 | playlist_url = playlistpage['url'] |
129 | if playlist_url == 'error_region': | |
130 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
131 | ||
5c2266df | 132 | req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) |
8cfb5bbf | 133 | req.add_header('Referer', url) |
134 | ||
f1f6f5aa TČ |
135 | playlist_title = self._og_search_title(webpage, default=None) |
136 | playlist_description = self._og_search_description(webpage, default=None) | |
97243fe3 S |
137 | |
138 | playlist = self._download_json(req, playlist_id)['playlist'] | |
139 | playlist_len = len(playlist) | |
140 | ||
141 | entries = [] | |
142 | for item in playlist: | |
3951e7eb | 143 | is_live = item.get('type') == 'LIVE' |
97243fe3 S |
144 | formats = [] |
145 | for format_id, stream_url in item['streamUrls'].items(): | |
146 | formats.extend(self._extract_m3u8_formats( | |
cc7397b0 | 147 | stream_url, playlist_id, 'mp4', |
f1f6f5aa TČ |
148 | entry_protocol='m3u8' if is_live else 'm3u8_native', |
149 | fatal=False)) | |
97243fe3 S |
150 | self._sort_formats(formats) |
151 | ||
152 | item_id = item.get('id') or item['assetId'] | |
153 | title = item['title'] | |
154 | ||
155 | duration = float_or_none(item.get('duration')) | |
156 | thumbnail = item.get('previewImageUrl') | |
157 | ||
158 | subtitles = {} | |
159 | if item.get('type') == 'VOD': | |
160 | subs = item.get('subtitles') | |
161 | if subs: | |
162 | subtitles = self.extract_subtitles(episode_id, subs) | |
163 | ||
f1f6f5aa | 164 | if playlist_len == 1: |
3951e7eb | 165 | final_title = playlist_title or title |
f1f6f5aa | 166 | if is_live: |
3951e7eb | 167 | final_title = self._live_title(final_title) |
f1f6f5aa TČ |
168 | else: |
169 | final_title = '%s (%s)' % (playlist_title, title) | |
170 | ||
97243fe3 S |
171 | entries.append({ |
172 | 'id': item_id, | |
f1f6f5aa | 173 | 'title': final_title, |
97243fe3 S |
174 | 'description': playlist_description if playlist_len == 1 else None, |
175 | 'thumbnail': thumbnail, | |
176 | 'duration': duration, | |
177 | 'formats': formats, | |
178 | 'subtitles': subtitles, | |
f1f6f5aa | 179 | 'is_live': is_live, |
97243fe3 S |
180 | }) |
181 | ||
182 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | |
c067545c | 183 | |
48246541 JMF |
184 | def _get_subtitles(self, episode_id, subs): |
185 | original_subtitles = self._download_webpage( | |
186 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
187 | srt_subs = self._fix_subtitles(original_subtitles) | |
188 | return { | |
189 | 'cs': [{ | |
190 | 'ext': 'srt', | |
191 | 'data': srt_subs, | |
192 | }] | |
193 | } | |
194 | ||
27a82a1b S |
195 | @staticmethod |
196 | def _fix_subtitles(subtitles): | |
197 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
198 | |
199 | def _msectotimecode(msec): | |
27a82a1b | 200 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
201 | components = [] |
202 | for divider in [1000, 60, 60, 100]: | |
203 | components.append(msec % divider) | |
204 | msec //= divider | |
611c1dd9 | 205 | return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) |
c067545c OC |
206 | |
207 | def _fix_subtitle(subtitle): | |
208 | for line in subtitle.splitlines(): | |
611c1dd9 | 209 | m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) |
c067545c OC |
210 | if m: |
211 | yield m.group(1) | |
212 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
611c1dd9 | 213 | yield '{0} --> {1}'.format(start, stop) |
c067545c OC |
214 | else: |
215 | yield line | |
216 | ||
611c1dd9 | 217 | return '\r\n'.join(_fix_subtitle(subtitles)) |