]>
Commit | Line | Data |
---|---|---|
8cfb5bbf | 1 | # -*- coding: utf-8 -*- |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
8cfb5bbf | 5 | |
48246541 | 6 | from .common import InfoExtractor |
1cc79574 | 7 | from ..compat import ( |
8cfb5bbf | 8 | compat_urllib_parse, |
3e72f5f1 | 9 | compat_urllib_parse_unquote, |
39201787 | 10 | compat_urllib_parse_urlparse, |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
39201787 | 13 | ExtractorError, |
02ec32a1 | 14 | float_or_none, |
5c2266df | 15 | sanitized_Request, |
8cfb5bbf | 16 | ) |
17 | ||
18 | ||
48246541 | 19 | class CeskaTelevizeIE(InfoExtractor): |
97243fe3 S |
20 | _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' |
21 | _TESTS = [{ | |
22 | 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | |
23 | 'info_dict': { | |
24 | 'id': '61924494876951776', | |
25 | 'ext': 'mp4', | |
26 | 'title': 'Hyde Park Civilizace', | |
27 | 'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', | |
28 | 'thumbnail': 're:^https?://.*\.jpg', | |
29 | 'duration': 3350, | |
30 | }, | |
31 | 'params': { | |
32 | # m3u8 download | |
33 | 'skip_download': True, | |
34 | }, | |
35 | }, { | |
36 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | |
37 | 'info_dict': { | |
38 | 'id': '61924494876844374', | |
39 | 'ext': 'mp4', | |
40 | 'title': 'První republika: Zpěvačka z Dupárny Bobina', | |
41 | 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', | |
42 | 'thumbnail': 're:^https?://.*\.jpg', | |
43 | 'duration': 88.4, | |
44 | }, | |
45 | 'params': { | |
46 | # m3u8 download | |
47 | 'skip_download': True, | |
48 | }, | |
49 | }, { | |
50 | # video with 18+ caution trailer | |
51 | 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | |
52 | 'info_dict': { | |
53 | 'id': '215562210900007-bogotart', | |
54 | 'title': 'Queer: Bogotart', | |
55 | 'description': 'Alternativní průvodce současným queer světem', | |
56 | }, | |
57 | 'playlist': [{ | |
7d78f0cc | 58 | 'info_dict': { |
97243fe3 | 59 | 'id': '61924494876844842', |
02ec32a1 | 60 | 'ext': 'mp4', |
97243fe3 S |
61 | 'title': 'Queer: Bogotart (Varování 18+)', |
62 | 'duration': 10.2, | |
7d78f0cc | 63 | }, |
97243fe3 | 64 | }, { |
7d78f0cc | 65 | 'info_dict': { |
97243fe3 | 66 | 'id': '61924494877068022', |
02ec32a1 | 67 | 'ext': 'mp4', |
97243fe3 | 68 | 'title': 'Queer: Bogotart (Queer)', |
02ec32a1 | 69 | 'thumbnail': 're:^https?://.*\.jpg', |
97243fe3 | 70 | 'duration': 1558.3, |
7d78f0cc | 71 | }, |
97243fe3 S |
72 | }], |
73 | 'params': { | |
74 | # m3u8 download | |
75 | 'skip_download': True, | |
8cfb5bbf | 76 | }, |
97243fe3 | 77 | }] |
8cfb5bbf | 78 | |
79 | def _real_extract(self, url): | |
80 | url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | |
81 | ||
82 | mobj = re.match(self._VALID_URL, url) | |
97243fe3 | 83 | playlist_id = mobj.group('id') |
8cfb5bbf | 84 | |
97243fe3 | 85 | webpage = self._download_webpage(url, playlist_id) |
8cfb5bbf | 86 | |
7d78f0cc S |
87 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
88 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
89 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
39201787 | 90 | |
97243fe3 S |
91 | typ = self._html_search_regex( |
92 | r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | |
93 | episode_id = self._html_search_regex( | |
94 | r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | |
8cfb5bbf | 95 | |
96 | data = { | |
97 | 'playlist[0][type]': typ, | |
98 | 'playlist[0][id]': episode_id, | |
99 | 'requestUrl': compat_urllib_parse_urlparse(url).path, | |
100 | 'requestSource': 'iVysilani', | |
101 | } | |
102 | ||
5c2266df | 103 | req = sanitized_Request( |
02ec32a1 S |
104 | 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', |
105 | data=compat_urllib_parse.urlencode(data)) | |
8cfb5bbf | 106 | |
107 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
108 | req.add_header('x-addr', '127.0.0.1') | |
109 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
110 | req.add_header('Referer', url) | |
111 | ||
97243fe3 | 112 | playlistpage = self._download_json(req, playlist_id) |
8cfb5bbf | 113 | |
02ec32a1 S |
114 | playlist_url = playlistpage['url'] |
115 | if playlist_url == 'error_region': | |
116 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
117 | ||
5c2266df | 118 | req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) |
8cfb5bbf | 119 | req.add_header('Referer', url) |
120 | ||
97243fe3 S |
121 | playlist_title = self._og_search_title(webpage) |
122 | playlist_description = self._og_search_description(webpage) | |
123 | ||
124 | playlist = self._download_json(req, playlist_id)['playlist'] | |
125 | playlist_len = len(playlist) | |
126 | ||
127 | entries = [] | |
128 | for item in playlist: | |
129 | formats = [] | |
130 | for format_id, stream_url in item['streamUrls'].items(): | |
131 | formats.extend(self._extract_m3u8_formats( | |
132 | stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native')) | |
133 | self._sort_formats(formats) | |
134 | ||
135 | item_id = item.get('id') or item['assetId'] | |
136 | title = item['title'] | |
137 | ||
138 | duration = float_or_none(item.get('duration')) | |
139 | thumbnail = item.get('previewImageUrl') | |
140 | ||
141 | subtitles = {} | |
142 | if item.get('type') == 'VOD': | |
143 | subs = item.get('subtitles') | |
144 | if subs: | |
145 | subtitles = self.extract_subtitles(episode_id, subs) | |
146 | ||
147 | entries.append({ | |
148 | 'id': item_id, | |
149 | 'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title), | |
150 | 'description': playlist_description if playlist_len == 1 else None, | |
151 | 'thumbnail': thumbnail, | |
152 | 'duration': duration, | |
153 | 'formats': formats, | |
154 | 'subtitles': subtitles, | |
155 | }) | |
156 | ||
157 | return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | |
c067545c | 158 | |
48246541 JMF |
159 | def _get_subtitles(self, episode_id, subs): |
160 | original_subtitles = self._download_webpage( | |
161 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
162 | srt_subs = self._fix_subtitles(original_subtitles) | |
163 | return { | |
164 | 'cs': [{ | |
165 | 'ext': 'srt', | |
166 | 'data': srt_subs, | |
167 | }] | |
168 | } | |
169 | ||
27a82a1b S |
170 | @staticmethod |
171 | def _fix_subtitles(subtitles): | |
172 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
173 | |
174 | def _msectotimecode(msec): | |
27a82a1b | 175 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
176 | components = [] |
177 | for divider in [1000, 60, 60, 100]: | |
178 | components.append(msec % divider) | |
179 | msec //= divider | |
180 | return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) | |
181 | ||
182 | def _fix_subtitle(subtitle): | |
183 | for line in subtitle.splitlines(): | |
27a82a1b | 184 | m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) |
c067545c OC |
185 | if m: |
186 | yield m.group(1) | |
187 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
6309cb9b | 188 | yield "{0} --> {1}".format(start, stop) |
c067545c OC |
189 | else: |
190 | yield line | |
191 | ||
48246541 | 192 | return "\r\n".join(_fix_subtitle(subtitles)) |