]>
Commit | Line | Data |
---|---|---|
a2e6db36 | 1 | # coding: utf-8 |
919052d0 | 2 | from __future__ import unicode_literals |
a2e6db36 | 3 | |
9abd500a | 4 | import functools |
d5822b96 PH |
5 | import re |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
61224dbc | 9 | int_or_none, |
02dbf93f | 10 | unified_strdate, |
9abd500a | 11 | OnDemandPagedList, |
7b091c37 | 12 | xpath_text, |
a5c1d955 | 13 | determine_ext, |
14 | qualities, | |
15 | float_or_none, | |
6b461026 | 16 | ExtractorError, |
d5822b96 PH |
17 | ) |
18 | ||
0b7c2485 | 19 | |
d5822b96 | 20 | class ZDFIE(InfoExtractor): |
9abd500a | 21 | _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' |
a2e6db36 | 22 | |
a5c1d955 | 23 | _TESTS = [{ |
919052d0 S |
24 | 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', |
25 | 'info_dict': { | |
26 | 'id': '2037704', | |
27 | 'ext': 'webm', | |
28 | 'title': 'ZDFspezial - Ende des Machtpokers', | |
29 | 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".', | |
30 | 'duration': 1022, | |
31 | 'uploader': 'spezial', | |
32 | 'uploader_id': '225948', | |
33 | 'upload_date': '20131127', | |
a2e6db36 | 34 | }, |
919052d0 | 35 | 'skip': 'Videos on ZDF.de are depublicised in short order', |
a5c1d955 | 36 | }] |
37 | ||
38 | def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): | |
39 | param_groups = {} | |
40 | for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): | |
41 | group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace')) | |
42 | params = {} | |
43 | for param in param_group: | |
44 | params[param.get('name')] = param.get('value') | |
45 | param_groups[group_id] = params | |
46 | ||
47 | formats = [] | |
48 | for video in smil.findall(self._xpath_ns('.//video', namespace)): | |
49 | src = video.get('src') | |
50 | if not src: | |
51 | continue | |
52 | bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) | |
53 | group_id = video.get('paramGroup') | |
54 | param_group = param_groups[group_id] | |
55 | for proto in param_group['protocols'].split(','): | |
56 | formats.append({ | |
57 | 'url': '%s://%s' % (proto, param_group['host']), | |
58 | 'app': param_group['app'], | |
59 | 'play_path': src, | |
60 | 'ext': 'flv', | |
61 | 'format_id': '%s-%d' % (proto, bitrate), | |
62 | 'tbr': bitrate, | |
a5c1d955 | 63 | }) |
64 | self._sort_formats(formats) | |
65 | return formats | |
66 | ||
67 | def extract_from_xml_url(self, video_id, xml_url): | |
68 | doc = self._download_xml( | |
69 | xml_url, video_id, | |
70 | note='Downloading video info', | |
71 | errnote='Failed to download video info') | |
72 | ||
6b461026 | 73 | status_code = doc.find('./status/statuscode') |
74 | if status_code is not None and status_code.text != 'ok': | |
75 | code = status_code.text | |
76 | if code == 'notVisibleAnymore': | |
77 | message = 'Video %s is not available' % video_id | |
78 | else: | |
79 | message = '%s returned error: %s' % (self.IE_NAME, code) | |
80 | raise ExtractorError(message, expected=True) | |
81 | ||
a5c1d955 | 82 | title = doc.find('.//information/title').text |
83 | description = xpath_text(doc, './/information/detail', 'description') | |
84 | duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) | |
85 | uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') | |
86 | uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') | |
87 | upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) | |
8c7d6e8e JMF |
88 | subtitles = {} |
89 | captions_url = doc.find('.//caption/url') | |
90 | if captions_url is not None: | |
91 | subtitles['de'] = [{ | |
92 | 'url': captions_url.text, | |
93 | 'ext': 'ttml', | |
94 | }] | |
a5c1d955 | 95 | |
96 | def xml_to_thumbnails(fnode): | |
97 | thumbnails = [] | |
98 | for node in fnode: | |
99 | thumbnail_url = node.text | |
100 | if not thumbnail_url: | |
101 | continue | |
102 | thumbnail = { | |
103 | 'url': thumbnail_url, | |
104 | } | |
105 | if 'key' in node.attrib: | |
106 | m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) | |
107 | if m: | |
108 | thumbnail['width'] = int(m.group(1)) | |
109 | thumbnail['height'] = int(m.group(2)) | |
110 | thumbnails.append(thumbnail) | |
111 | return thumbnails | |
112 | ||
113 | thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) | |
114 | ||
115 | format_nodes = doc.findall('.//formitaeten/formitaet') | |
116 | quality = qualities(['veryhigh', 'high', 'med', 'low']) | |
117 | ||
118 | def get_quality(elem): | |
119 | return quality(xpath_text(elem, 'quality')) | |
120 | format_nodes.sort(key=get_quality) | |
121 | format_ids = [] | |
122 | formats = [] | |
123 | for fnode in format_nodes: | |
124 | video_url = fnode.find('url').text | |
125 | is_available = 'http://www.metafilegenerator' not in video_url | |
126 | if not is_available: | |
127 | continue | |
128 | format_id = fnode.attrib['basetype'] | |
129 | quality = xpath_text(fnode, './quality', 'quality') | |
130 | format_m = re.match(r'''(?x) | |
131 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
132 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
133 | ''', format_id) | |
134 | ||
135 | ext = determine_ext(video_url, None) or format_m.group('container') | |
136 | if ext not in ('smil', 'f4m', 'm3u8'): | |
137 | format_id = format_id + '-' + quality | |
138 | if format_id in format_ids: | |
139 | continue | |
140 | ||
141 | if ext == 'meta': | |
142 | continue | |
143 | elif ext == 'smil': | |
7e5edcfd S |
144 | formats.extend(self._extract_smil_formats( |
145 | video_url, video_id, fatal=False)) | |
a5c1d955 | 146 | elif ext == 'm3u8': |
9173202b JMF |
147 | # the certificates are misconfigured (see |
148 | # https://github.com/rg3/youtube-dl/issues/8665) | |
149 | if video_url.startswith('https://'): | |
150 | continue | |
7e5edcfd | 151 | formats.extend(self._extract_m3u8_formats( |
6b461026 | 152 | video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) |
a5c1d955 | 153 | elif ext == 'f4m': |
7e5edcfd | 154 | formats.extend(self._extract_f4m_formats( |
6b461026 | 155 | video_url, video_id, f4m_id=format_id, fatal=False)) |
a5c1d955 | 156 | else: |
157 | proto = format_m.group('proto').lower() | |
158 | ||
159 | abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) | |
160 | vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) | |
161 | ||
162 | width = int_or_none(xpath_text(fnode, './width', 'width')) | |
163 | height = int_or_none(xpath_text(fnode, './height', 'height')) | |
164 | ||
165 | filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) | |
166 | ||
167 | format_note = '' | |
168 | if not format_note: | |
169 | format_note = None | |
170 | ||
171 | formats.append({ | |
172 | 'format_id': format_id, | |
173 | 'url': video_url, | |
174 | 'ext': ext, | |
175 | 'acodec': format_m.group('acodec'), | |
176 | 'vcodec': format_m.group('vcodec'), | |
177 | 'abr': abr, | |
178 | 'vbr': vbr, | |
179 | 'width': width, | |
180 | 'height': height, | |
181 | 'filesize': filesize, | |
182 | 'format_note': format_note, | |
183 | 'protocol': proto, | |
184 | '_available': is_available, | |
185 | }) | |
186 | format_ids.append(format_id) | |
187 | ||
188 | self._sort_formats(formats) | |
189 | ||
190 | return { | |
191 | 'id': video_id, | |
192 | 'title': title, | |
193 | 'description': description, | |
194 | 'duration': duration, | |
195 | 'thumbnails': thumbnails, | |
196 | 'uploader': uploader, | |
197 | 'uploader_id': uploader_id, | |
198 | 'upload_date': upload_date, | |
199 | 'formats': formats, | |
8c7d6e8e | 200 | 'subtitles': subtitles, |
a5c1d955 | 201 | } |
d5822b96 | 202 | |
9abd500a PH |
203 | def _real_extract(self, url): |
204 | video_id = self._match_id(url) | |
919052d0 | 205 | xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id |
a5c1d955 | 206 | return self.extract_from_xml_url(video_id, xml_url) |
8560c618 | 207 | |
8560c618 | 208 | |
9abd500a | 209 | class ZDFChannelIE(InfoExtractor): |
67ba388e | 210 | _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)' |
c2404463 | 211 | _TESTS = [{ |
8560c618 AK |
212 | 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', |
213 | 'info_dict': { | |
214 | 'id': '1586442', | |
8560c618 | 215 | }, |
d51a853d | 216 | 'playlist_count': 3, |
c2404463 S |
217 | }, { |
218 | 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332', | |
219 | 'only_matching': True, | |
220 | }, { | |
221 | 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332', | |
222 | 'only_matching': True, | |
223 | }, { | |
224 | 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off', | |
225 | 'only_matching': True, | |
226 | }] | |
9abd500a PH |
227 | _PAGE_SIZE = 50 |
228 | ||
229 | def _fetch_page(self, channel_id, page): | |
230 | offset = page * self._PAGE_SIZE | |
231 | xml_url = ( | |
232 | 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' | |
233 | % (offset, self._PAGE_SIZE, channel_id)) | |
234 | doc = self._download_xml( | |
235 | xml_url, channel_id, | |
236 | note='Downloading channel info', | |
237 | errnote='Failed to download channel info') | |
238 | ||
239 | title = doc.find('.//information/title').text | |
240 | description = doc.find('.//information/detail').text | |
241 | for asset in doc.findall('.//teasers/teaser'): | |
242 | a_type = asset.find('./type').text | |
243 | a_id = asset.find('./details/assetId').text | |
244 | if a_type not in ('video', 'topic'): | |
245 | continue | |
246 | yield { | |
247 | '_type': 'url', | |
248 | 'playlist_title': title, | |
249 | 'playlist_description': description, | |
250 | 'url': 'zdf:%s:%s' % (a_type, a_id), | |
251 | } | |
252 | ||
253 | def _real_extract(self, url): | |
254 | channel_id = self._match_id(url) | |
255 | entries = OnDemandPagedList( | |
256 | functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) | |
8560c618 | 257 | |
8560c618 AK |
258 | return { |
259 | '_type': 'playlist', | |
9abd500a PH |
260 | 'id': channel_id, |
261 | 'entries': entries, | |
8560c618 | 262 | } |