]>
Commit | Line | Data |
---|---|---|
8cfb5bbf | 1 | # -*- coding: utf-8 -*- |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
8cfb5bbf | 5 | |
48246541 | 6 | from .common import InfoExtractor |
1cc79574 | 7 | from ..compat import ( |
8cfb5bbf | 8 | compat_urllib_request, |
9 | compat_urllib_parse, | |
3e72f5f1 | 10 | compat_urllib_parse_unquote, |
39201787 | 11 | compat_urllib_parse_urlparse, |
1cc79574 PH |
12 | ) |
13 | from ..utils import ( | |
39201787 | 14 | ExtractorError, |
02ec32a1 | 15 | float_or_none, |
8cfb5bbf | 16 | ) |
17 | ||
18 | ||
48246541 | 19 | class CeskaTelevizeIE(InfoExtractor): |
8cfb5bbf | 20 | _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' |
21 | ||
7d78f0cc S |
22 | _TESTS = [ |
23 | { | |
02ec32a1 | 24 | 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', |
7d78f0cc | 25 | 'info_dict': { |
02ec32a1 S |
26 | 'id': '214411058091220', |
27 | 'ext': 'mp4', | |
28 | 'title': 'Hyde Park Civilizace', | |
29 | 'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře', | |
30 | 'thumbnail': 're:^https?://.*\.jpg', | |
31 | 'duration': 3350, | |
7d78f0cc S |
32 | }, |
33 | 'params': { | |
02ec32a1 S |
34 | # m3u8 download |
35 | 'skip_download': True, | |
7d78f0cc | 36 | }, |
8cfb5bbf | 37 | }, |
7d78f0cc S |
38 | { |
39 | 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | |
40 | 'info_dict': { | |
41 | 'id': '14716', | |
02ec32a1 | 42 | 'ext': 'mp4', |
7d78f0cc | 43 | 'title': 'První republika: Zpěvačka z Dupárny Bobina', |
02ec32a1 S |
44 | 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', |
45 | 'thumbnail': 're:^https?://.*\.jpg', | |
46 | 'duration': 88.4, | |
7d78f0cc S |
47 | }, |
48 | 'params': { | |
02ec32a1 S |
49 | # m3u8 download |
50 | 'skip_download': True, | |
7d78f0cc | 51 | }, |
8cfb5bbf | 52 | }, |
7d78f0cc | 53 | ] |
8cfb5bbf | 54 | |
55 | def _real_extract(self, url): | |
56 | url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | |
57 | ||
58 | mobj = re.match(self._VALID_URL, url) | |
59 | video_id = mobj.group('id') | |
60 | ||
61 | webpage = self._download_webpage(url, video_id) | |
62 | ||
7d78f0cc S |
63 | NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |
64 | if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | |
65 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
39201787 | 66 | |
8cfb5bbf | 67 | typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') |
68 | episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | |
69 | ||
70 | data = { | |
71 | 'playlist[0][type]': typ, | |
72 | 'playlist[0][id]': episode_id, | |
73 | 'requestUrl': compat_urllib_parse_urlparse(url).path, | |
74 | 'requestSource': 'iVysilani', | |
75 | } | |
76 | ||
02ec32a1 S |
77 | req = compat_urllib_request.Request( |
78 | 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', | |
79 | data=compat_urllib_parse.urlencode(data)) | |
8cfb5bbf | 80 | |
81 | req.add_header('Content-type', 'application/x-www-form-urlencoded') | |
82 | req.add_header('x-addr', '127.0.0.1') | |
83 | req.add_header('X-Requested-With', 'XMLHttpRequest') | |
84 | req.add_header('Referer', url) | |
85 | ||
5a0b2625 | 86 | playlistpage = self._download_json(req, video_id) |
8cfb5bbf | 87 | |
02ec32a1 S |
88 | playlist_url = playlistpage['url'] |
89 | if playlist_url == 'error_region': | |
90 | raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | |
91 | ||
3e72f5f1 | 92 | req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url)) |
8cfb5bbf | 93 | req.add_header('Referer', url) |
94 | ||
02ec32a1 | 95 | playlist = self._download_json(req, video_id) |
5f6a1245 | 96 | |
02ec32a1 | 97 | item = playlist['playlist'][0] |
8cfb5bbf | 98 | formats = [] |
02ec32a1 S |
99 | for format_id, stream_url in item['streamUrls'].items(): |
100 | formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4')) | |
8cfb5bbf | 101 | self._sort_formats(formats) |
102 | ||
02ec32a1 S |
103 | title = self._og_search_title(webpage) |
104 | description = self._og_search_description(webpage) | |
105 | duration = float_or_none(item.get('duration')) | |
106 | thumbnail = item.get('previewImageUrl') | |
107 | ||
27a82a1b S |
108 | subtitles = {} |
109 | subs = item.get('subtitles') | |
110 | if subs: | |
48246541 | 111 | subtitles = self.extract_subtitles(episode_id, subs) |
c067545c | 112 | |
8cfb5bbf | 113 | return { |
114 | 'id': episode_id, | |
02ec32a1 S |
115 | 'title': title, |
116 | 'description': description, | |
117 | 'thumbnail': thumbnail, | |
118 | 'duration': duration, | |
8cfb5bbf | 119 | 'formats': formats, |
c067545c | 120 | 'subtitles': subtitles, |
8cfb5bbf | 121 | } |
c067545c | 122 | |
48246541 JMF |
123 | def _get_subtitles(self, episode_id, subs): |
124 | original_subtitles = self._download_webpage( | |
125 | subs[0]['url'], episode_id, 'Downloading subtitles') | |
126 | srt_subs = self._fix_subtitles(original_subtitles) | |
127 | return { | |
128 | 'cs': [{ | |
129 | 'ext': 'srt', | |
130 | 'data': srt_subs, | |
131 | }] | |
132 | } | |
133 | ||
27a82a1b S |
134 | @staticmethod |
135 | def _fix_subtitles(subtitles): | |
136 | """ Convert millisecond-based subtitles to SRT """ | |
c067545c OC |
137 | |
138 | def _msectotimecode(msec): | |
27a82a1b | 139 | """ Helper utility to convert milliseconds to timecode """ |
c067545c OC |
140 | components = [] |
141 | for divider in [1000, 60, 60, 100]: | |
142 | components.append(msec % divider) | |
143 | msec //= divider | |
144 | return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) | |
145 | ||
146 | def _fix_subtitle(subtitle): | |
147 | for line in subtitle.splitlines(): | |
27a82a1b | 148 | m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) |
c067545c OC |
149 | if m: |
150 | yield m.group(1) | |
151 | start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | |
6309cb9b | 152 | yield "{0} --> {1}".format(start, stop) |
c067545c OC |
153 | else: |
154 | yield line | |
155 | ||
48246541 | 156 | return "\r\n".join(_fix_subtitle(subtitles)) |