]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | ExtractorError, | |
6 | ) | |
7 | ||
8 | class MDRIE(InfoExtractor): | |
9 | _VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*' | |
10 | _TITLE = r'<h2>(?P<title1>[^<]+)<span>(?P<title2>[^<]+)</span></h2>' | |
11 | ||
12 | _MEDIA_XML = r'(?P<xmlurl>/mediathek/(.+)/(video|audio)([0-9]+)-avCustom.xml)' | |
13 | _MEDIA_STREAM_VIDEO = r'<asset>.*<frameWidth>(?P<frameWidth>[0-9]+)</frameWidth>.*<flashMediaServerApplicationURL>(?P<flashMediaServerApplicationURL>[^<]+)</flashMediaServerApplicationURL><flashMediaServerURL>(?P<flashMediaServerURL>[^<]+)</flashMediaServerURL>.*<progressiveDownloadUrl>(?P<progressiveDownloadUrl>[^<]+)</progressiveDownloadUrl></asset>' | |
14 | _MEDIA_STREAM_AUDIO = r'<asset>.*<mediaType>(?P<mediaType>[A-Z0-9]+)</mediaType><bitrateAudio>(?P<bitrateAudio>[0-9]+)</bitrateAudio>.*<flashMediaServerApplicationURL>(?P<flashMediaServerApplicationURL>[^<]+)</flashMediaServerApplicationURL><flashMediaServerURL>(?P<flashMediaServerURL>[^<]+)</flashMediaServerURL>.*<progressiveDownloadUrl>(?P<progressiveDownloadUrl>[^<]+)</progressiveDownloadUrl></asset>' | |
15 | _TESTS = [{ | |
16 | u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html', | |
17 | u'file': u'165624.mp4', | |
18 | u'md5': u'95165945756198b8fa2dea10f0b04614', | |
19 | u'info_dict': { | |
20 | u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr" | |
21 | }, | |
22 | #u'skip': u'Requires rtmpdump' # rtmp is optional | |
23 | }, | |
24 | { | |
25 | u'url': u' http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html', | |
26 | u'file': u'718370.mp4', | |
27 | u'md5': u'4a5b1fbb5519fb0d929c384b6ff7cb8b', | |
28 | u'info_dict': { | |
29 | u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr" | |
30 | }, | |
31 | #u'skip': u'Requires rtmpdump' # rtmp is optional | |
32 | }] | |
33 | ||
34 | def _real_extract(self, url): | |
35 | ||
36 | # determine video id from url | |
37 | m = re.match(self._VALID_URL, url) | |
38 | video_id = m.group('video_id') | |
39 | domain = m.group('domain') | |
40 | mediatype = m.group('type') | |
41 | ||
42 | # determine title and media streams from webpage | |
43 | html = self._download_webpage(url, video_id) | |
44 | t = re.search(self._TITLE, html) | |
45 | if not t: | |
46 | raise ExtractorError(u'no title found') | |
47 | title = t.group('title1') + t.group('title2') | |
48 | m = re.search(self._MEDIA_XML, html) | |
49 | if not m: | |
50 | raise ExtractorError(u'no xml found') | |
51 | xmlurl = m.group('xmlurl') | |
52 | xml = self._download_webpage(domain+xmlurl, video_id, 'download XML').replace('\n','').replace('\r','').replace('<asset>','\n<asset>').replace('</asset>','</asset>\n') | |
53 | if(mediatype == "video"): | |
54 | streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM_VIDEO, xml)] | |
55 | if not streams: | |
56 | raise ExtractorError(u'no media found') | |
57 | # choose default media type and highest quality for now | |
58 | stream = max([s for s in streams if s["progressiveDownloadUrl"].startswith("http://") ], | |
59 | key=lambda s: int(s["frameWidth"])) | |
60 | else: | |
61 | streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM_AUDIO, xml)] | |
62 | if not streams: | |
63 | raise ExtractorError(u'no media found') | |
64 | # choose default media type (MP4) and highest quality for now | |
65 | stream = max([s for s in streams if s["progressiveDownloadUrl"].startswith("http://") and s["mediaType"] == "MP4" ], | |
66 | key=lambda s: int(s["bitrateAudio"])) | |
67 | ||
68 | # there's two possibilities: RTMP stream or HTTP download | |
69 | info = {'id': video_id, 'title': title, 'ext': 'mp4'} | |
70 | if not stream["progressiveDownloadUrl"]: | |
71 | self.to_screen(u'RTMP download detected') | |
72 | assert stream['flashMediaServerURL'].startswith('mp4:') | |
73 | info["url"] = stream["flashMediaServerApplicationURL"] | |
74 | info["play_path"] = stream['flashMediaServerURL'] | |
75 | else: | |
76 | assert stream["progressiveDownloadUrl"].endswith('.mp4') | |
77 | info["url"] = stream["progressiveDownloadUrl"] | |
78 | return [info] |