]>
Commit | Line | Data |
---|---|---|
8cfb5bbf | 1 | # -*- coding: utf-8 -*- |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
8cfb5bbf | 5 | |
48246541 | 6 | from .common import InfoExtractor |
1cc79574 | 7 | from ..compat import ( |
8cfb5bbf | 8 | compat_urllib_request, |
9 | compat_urllib_parse, | |
39201787 | 10 | compat_urllib_parse_urlparse, |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
39201787 | 13 | ExtractorError, |
02ec32a1 | 14 | float_or_none, |
8cfb5bbf | 15 | ) |
16 | ||
17 | ||
48246541 | 18 | class CeskaTelevizeIE(InfoExtractor): |
8cfb5bbf | 19 | _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' |
20 | ||
7d78f0cc S |
21 | _TESTS = [ |
22 | { | |
02ec32a1 | 23 | 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', |
7d78f0cc | 24 | 'info_dict': { |
02ec32a1 S |
25 | 'id': '214411058091220', |
26 | 'ext': 'mp4', | |
27 | 'title': 'Hyde Park Civilizace', | |
28 | 'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře', | |
29 | 'thumbnail': 're:^https?://.*\.jpg', | |
30 | 'duration': 3350, | |
7d78f0cc S |
31 | }, |
32 | 'params': { | |
02ec32a1 S |
33 | # m3u8 download |
34 | 'skip_download': True, | |
7d78f0cc | 35 | }, |
8cfb5bbf | 36 | }, |
7d78f0cc S |
37 | { |
38 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | |
39 | 'info_dict': { | |
40 | 'id': '14716', | |
02ec32a1 | 41 | 'ext': 'mp4', |
7d78f0cc | 42 | 'title': 'První republika: Zpěvačka z Dupárny Bobina', |
02ec32a1 S |
43 | 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', |
44 | 'thumbnail': 're:^https?://.*\.jpg', | |
45 | 'duration': 88.4, | |
7d78f0cc S |
46 | }, |
47 | 'params': { | |
02ec32a1 S |
48 | # m3u8 download |
49 | 'skip_download': True, | |
7d78f0cc | 50 | }, |
8cfb5bbf | 51 | }, |
7d78f0cc | 52 | ] |
8cfb5bbf | 53 | |
54 | def _real_extract(self, url): | |
55 | url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | |
56 | ||
57 | mobj = re.match(self._VALID_URL, url) | |
58 | video_id = mobj.group('id') | |
59 | ||
60 | webpage = self._download_webpage(url, video_id) | |
61 | ||
7d78f0cc S |
62 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
63 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
64 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
39201787 | 65 | |
8cfb5bbf | 66 | typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') |
67 | episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | |
68 | ||
69 | data = { | |
70 | 'playlist[0][type]': typ, | |
71 | 'playlist[0][id]': episode_id, | |
72 | 'requestUrl': compat_urllib_parse_urlparse(url).path, | |
73 | 'requestSource': 'iVysilani', | |
74 | } | |
75 | ||
02ec32a1 S |
76 | req = compat_urllib_request.Request( |
77 | 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', | |
78 | data=compat_urllib_parse.urlencode(data)) | |
8cfb5bbf | 79 | |
80 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
81 | req.add_header('x-addr', '127.0.0.1') | |
82 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
83 | req.add_header('Referer', url) | |
84 | ||
5a0b2625 | 85 | playlistpage = self._download_json(req, video_id) |
8cfb5bbf | 86 | |
02ec32a1 S |
87 | playlist_url = playlistpage['url'] |
88 | if playlist_url == 'error_region': | |
89 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
90 | ||
91 | req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url)) | |
8cfb5bbf | 92 | req.add_header('Referer', url) |
93 | ||
02ec32a1 | 94 | playlist = self._download_json(req, video_id) |
5f6a1245 | 95 | |
02ec32a1 | 96 | item = playlist['playlist'][0] |
8cfb5bbf | 97 | formats = [] |
02ec32a1 S |
98 | for format_id, stream_url in item['streamUrls'].items(): |
99 | formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4')) | |
8cfb5bbf | 100 | self._sort_formats(formats) |
101 | ||
02ec32a1 S |
102 | title = self._og_search_title(webpage) |
103 | description = self._og_search_description(webpage) | |
104 | duration = float_or_none(item.get('duration')) | |
105 | thumbnail = item.get('previewImageUrl') | |
106 | ||
27a82a1b S |
107 | subtitles = {} |
108 | subs = item.get('subtitles') | |
109 | if subs: | |
48246541 | 110 | subtitles = self.extract_subtitles(episode_id, subs) |
c067545c | 111 | |
8cfb5bbf | 112 | return { |
113 | 'id': episode_id, | |
02ec32a1 S |
114 | 'title': title, |
115 | 'description': description, | |
116 | 'thumbnail': thumbnail, | |
117 | 'duration': duration, | |
8cfb5bbf | 118 | 'formats': formats, |
c067545c | 119 | 'subtitles': subtitles, |
8cfb5bbf | 120 | } |
c067545c | 121 | |
48246541 JMF |
122 | def _get_subtitles(self, episode_id, subs): |
123 | original_subtitles = self._download_webpage( | |
124 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
125 | srt_subs = self._fix_subtitles(original_subtitles) | |
126 | return { | |
127 | 'cs': [{ | |
128 | 'ext': 'srt', | |
129 | 'data': srt_subs, | |
130 | }] | |
131 | } | |
132 | ||
27a82a1b S |
133 | @staticmethod |
134 | def _fix_subtitles(subtitles): | |
135 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
136 | |
137 | def _msectotimecode(msec): | |
27a82a1b | 138 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
139 | components = [] |
140 | for divider in [1000, 60, 60, 100]: | |
141 | components.append(msec % divider) | |
142 | msec //= divider | |
143 | return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) | |
144 | ||
145 | def _fix_subtitle(subtitle): | |
146 | for line in subtitle.splitlines(): | |
27a82a1b | 147 | m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) |
c067545c OC |
148 | if m: |
149 | yield m.group(1) | |
150 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
6309cb9b | 151 | yield "{0} --> {1}".format(start, stop) |
c067545c OC |
152 | else: |
153 | yield line | |
154 | ||
48246541 | 155 | return "\r\n".join(_fix_subtitle(subtitles)) |