]>
Commit | Line | Data |
---|---|---|
e0b4cc48 | 1 | from __future__ import unicode_literals |
73e79f2a PH |
2 | |
3 | import re | |
73e79f2a | 4 | |
b2727d0b S |
5 | from .common import InfoExtractor |
6 | from ..utils import ( | |
7 | int_or_none, | |
8 | unified_strdate, | |
9 | xpath_text, | |
10 | determine_ext, | |
11 | qualities, | |
12 | float_or_none, | |
13 | ExtractorError, | |
14 | ) | |
73e79f2a PH |
15 | |
16 | ||
b2727d0b | 17 | class DreiSatIE(InfoExtractor): |
73e79f2a | 18 | IE_NAME = '3sat' |
5886b38d | 19 | _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' |
67fc8ecd NJ |
20 | _TESTS = [ |
21 | { | |
22 | 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', | |
23 | 'md5': 'be37228896d30a88f315b638900a026e', | |
24 | 'info_dict': { | |
25 | 'id': '45918', | |
26 | 'ext': 'mp4', | |
27 | 'title': 'Waidmannsheil', | |
28 | 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', | |
7b0d333a NP |
29 | 'uploader': 'SCHWEIZWEIT', |
30 | 'uploader_id': '100000210', | |
67fc8ecd | 31 | 'upload_date': '20140913' |
7b0d333a NP |
32 | }, |
33 | 'params': { | |
34 | 'skip_download': True, # m3u8 downloads | |
67fc8ecd NJ |
35 | } |
36 | }, | |
37 | { | |
38 | 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', | |
39 | 'only_matching': True, | |
40 | }, | |
41 | ] | |
73e79f2a | 42 | |
b2727d0b S |
43 | def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): |
44 | param_groups = {} | |
45 | for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): | |
46 | group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace')) | |
47 | params = {} | |
48 | for param in param_group: | |
49 | params[param.get('name')] = param.get('value') | |
50 | param_groups[group_id] = params | |
51 | ||
52 | formats = [] | |
53 | for video in smil.findall(self._xpath_ns('.//video', namespace)): | |
54 | src = video.get('src') | |
55 | if not src: | |
56 | continue | |
57 | bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) | |
58 | group_id = video.get('paramGroup') | |
59 | param_group = param_groups[group_id] | |
60 | for proto in param_group['protocols'].split(','): | |
61 | formats.append({ | |
62 | 'url': '%s://%s' % (proto, param_group['host']), | |
63 | 'app': param_group['app'], | |
64 | 'play_path': src, | |
65 | 'ext': 'flv', | |
66 | 'format_id': '%s-%d' % (proto, bitrate), | |
67 | 'tbr': bitrate, | |
68 | }) | |
69 | self._sort_formats(formats) | |
70 | return formats | |
71 | ||
72 | def extract_from_xml_url(self, video_id, xml_url): | |
73 | doc = self._download_xml( | |
74 | xml_url, video_id, | |
75 | note='Downloading video info', | |
76 | errnote='Failed to download video info') | |
77 | ||
78 | status_code = doc.find('./status/statuscode') | |
79 | if status_code is not None and status_code.text != 'ok': | |
80 | code = status_code.text | |
81 | if code == 'notVisibleAnymore': | |
82 | message = 'Video %s is not available' % video_id | |
83 | else: | |
84 | message = '%s returned error: %s' % (self.IE_NAME, code) | |
85 | raise ExtractorError(message, expected=True) | |
86 | ||
87 | title = doc.find('.//information/title').text | |
88 | description = xpath_text(doc, './/information/detail', 'description') | |
89 | duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) | |
90 | uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') | |
91 | uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') | |
92 | upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) | |
93 | ||
94 | def xml_to_thumbnails(fnode): | |
95 | thumbnails = [] | |
96 | for node in fnode: | |
97 | thumbnail_url = node.text | |
98 | if not thumbnail_url: | |
99 | continue | |
100 | thumbnail = { | |
101 | 'url': thumbnail_url, | |
102 | } | |
103 | if 'key' in node.attrib: | |
104 | m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) | |
105 | if m: | |
106 | thumbnail['width'] = int(m.group(1)) | |
107 | thumbnail['height'] = int(m.group(2)) | |
108 | thumbnails.append(thumbnail) | |
109 | return thumbnails | |
110 | ||
111 | thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) | |
112 | ||
113 | format_nodes = doc.findall('.//formitaeten/formitaet') | |
114 | quality = qualities(['veryhigh', 'high', 'med', 'low']) | |
115 | ||
116 | def get_quality(elem): | |
117 | return quality(xpath_text(elem, 'quality')) | |
118 | format_nodes.sort(key=get_quality) | |
119 | format_ids = [] | |
120 | formats = [] | |
121 | for fnode in format_nodes: | |
122 | video_url = fnode.find('url').text | |
123 | is_available = 'http://www.metafilegenerator' not in video_url | |
124 | if not is_available: | |
125 | continue | |
126 | format_id = fnode.attrib['basetype'] | |
127 | quality = xpath_text(fnode, './quality', 'quality') | |
128 | format_m = re.match(r'''(?x) | |
129 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
130 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
131 | ''', format_id) | |
132 | ||
133 | ext = determine_ext(video_url, None) or format_m.group('container') | |
134 | if ext not in ('smil', 'f4m', 'm3u8'): | |
135 | format_id = format_id + '-' + quality | |
136 | if format_id in format_ids: | |
137 | continue | |
138 | ||
139 | if ext == 'meta': | |
140 | continue | |
141 | elif ext == 'smil': | |
142 | formats.extend(self._extract_smil_formats( | |
143 | video_url, video_id, fatal=False)) | |
144 | elif ext == 'm3u8': | |
145 | # the certificates are misconfigured (see | |
146 | # https://github.com/rg3/youtube-dl/issues/8665) | |
147 | if video_url.startswith('https://'): | |
148 | continue | |
149 | formats.extend(self._extract_m3u8_formats( | |
150 | video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) | |
151 | elif ext == 'f4m': | |
152 | formats.extend(self._extract_f4m_formats( | |
153 | video_url, video_id, f4m_id=format_id, fatal=False)) | |
154 | else: | |
155 | proto = format_m.group('proto').lower() | |
156 | ||
157 | abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) | |
158 | vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) | |
159 | ||
160 | width = int_or_none(xpath_text(fnode, './width', 'width')) | |
161 | height = int_or_none(xpath_text(fnode, './height', 'height')) | |
162 | ||
163 | filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) | |
164 | ||
165 | format_note = '' | |
166 | if not format_note: | |
167 | format_note = None | |
168 | ||
169 | formats.append({ | |
170 | 'format_id': format_id, | |
171 | 'url': video_url, | |
172 | 'ext': ext, | |
173 | 'acodec': format_m.group('acodec'), | |
174 | 'vcodec': format_m.group('vcodec'), | |
175 | 'abr': abr, | |
176 | 'vbr': vbr, | |
177 | 'width': width, | |
178 | 'height': height, | |
179 | 'filesize': filesize, | |
180 | 'format_note': format_note, | |
181 | 'protocol': proto, | |
182 | '_available': is_available, | |
183 | }) | |
184 | format_ids.append(format_id) | |
185 | ||
186 | self._sort_formats(formats) | |
187 | ||
188 | return { | |
189 | 'id': video_id, | |
190 | 'title': title, | |
191 | 'description': description, | |
192 | 'duration': duration, | |
193 | 'thumbnails': thumbnails, | |
194 | 'uploader': uploader, | |
195 | 'uploader_id': uploader_id, | |
196 | 'upload_date': upload_date, | |
197 | 'formats': formats, | |
198 | } | |
199 | ||
73e79f2a PH |
200 | def _real_extract(self, url): |
201 | mobj = re.match(self._VALID_URL, url) | |
202 | video_id = mobj.group('id') | |
203 | details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | |
4059eabd | 204 | return self.extract_from_xml_url(video_id, details_url) |