]>
Commit | Line | Data |
---|---|---|
df1d7da2 | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | ExtractorError, | |
6 | ) | |
7 | ||
09dacfa5 | 8 | |
df1d7da2 | 9 | class MDRIE(InfoExtractor): |
10 | _VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*' | |
bd1488ae PH |
11 | |
12 | # No tests, MDR regularily deletes its videos | |
df1d7da2 | 13 | |
14 | def _real_extract(self, url): | |
df1d7da2 | 15 | m = re.match(self._VALID_URL, url) |
16 | video_id = m.group('video_id') | |
17 | domain = m.group('domain') | |
df1d7da2 | 18 | |
19 | # determine title and media streams from webpage | |
20 | html = self._download_webpage(url, video_id) | |
df1d7da2 | 21 | |
09dacfa5 PH |
22 | title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title') |
23 | xmlurl = self._search_regex( | |
24 | r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL') | |
25 | ||
26 | doc = self._download_xml(domain + xmlurl, video_id) | |
27 | formats = [] | |
28 | for a in doc.findall('./assets/asset'): | |
29 | url_el = a.find('.//progressiveDownloadUrl') | |
30 | if url_el is None: | |
31 | continue | |
32 | abr = int(a.find('bitrateAudio').text) // 1000 | |
33 | media_type = a.find('mediaType').text | |
34 | format = { | |
35 | 'abr': abr, | |
36 | 'filesize': int(a.find('fileSize').text), | |
37 | 'url': url_el.text, | |
38 | } | |
39 | ||
40 | vbr_el = a.find('bitrateVideo') | |
41 | if vbr_el is None: | |
42 | format.update({ | |
43 | 'vcodec': 'none', | |
44 | 'format_id': u'%s-%d' % (media_type, abr), | |
45 | }) | |
46 | else: | |
47 | vbr = int(vbr_el.text) // 1000 | |
48 | format.update({ | |
49 | 'vbr': vbr, | |
50 | 'width': int(a.find('frameWidth').text), | |
51 | 'height': int(a.find('frameHeight').text), | |
52 | 'format_id': u'%s-%d' % (media_type, vbr), | |
53 | }) | |
54 | formats.append(format) | |
09dacfa5 | 55 | if not formats: |
8c5f0c9f | 56 | raise ExtractorError(u'Could not find any valid formats') |
09dacfa5 | 57 | |
b874fe2d PH |
58 | self._sort_formats(formats) |
59 | ||
09dacfa5 PH |
60 | return { |
61 | 'id': video_id, | |
62 | 'title': title, | |
63 | 'formats': formats, | |
64 | } |