]>
Commit | Line | Data |
---|---|---|
d5822b96 PH |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | ExtractorError, | |
6 | unescapeHTML, | |
7 | ) | |
8 | ||
9 | class ZDFIE(InfoExtractor): | |
10 | _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?' | |
11 | _TITLE = r'<h1(?: class="beitragHeadline")?>(?P<title>.*)</h1>' | |
12 | _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>' | |
13 | _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' | |
14 | _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' | |
15 | ||
16 | def _real_extract(self, url): | |
17 | mobj = re.match(self._VALID_URL, url) | |
18 | if mobj is None: | |
19 | raise ExtractorError(u'Invalid URL: %s' % url) | |
20 | video_id = mobj.group('video_id') | |
21 | ||
22 | html = self._download_webpage(url, video_id) | |
23 | streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] | |
24 | if streams is None: | |
25 | raise ExtractorError(u'No media url found.') | |
26 | ||
27 | # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url | |
28 | # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url | |
29 | # choose first/default media type and highest quality for now | |
30 | for s in streams: #find 300 - dsl1000mbit | |
31 | if s['quality'] == '300' and s['media_type'] == 'wstreaming': | |
32 | stream_=s | |
33 | break | |
34 | for s in streams: #find veryhigh - dsl2000mbit | |
35 | if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working | |
36 | stream_=s | |
37 | break | |
38 | if stream_ is None: | |
39 | raise ExtractorError(u'No stream found.') | |
40 | ||
41 | media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL') | |
42 | ||
43 | self.report_extraction(video_id) | |
44 | mobj = re.search(self._TITLE, html) | |
45 | if mobj is None: | |
46 | raise ExtractorError(u'Cannot extract title') | |
47 | title = unescapeHTML(mobj.group('title')) | |
48 | ||
49 | mobj = re.search(self._MMS_STREAM, media_link) | |
50 | if mobj is None: | |
51 | mobj = re.search(self._RTSP_STREAM, media_link) | |
52 | if mobj is None: | |
53 | raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL') | |
54 | mms_url = mobj.group('video_url') | |
55 | ||
56 | mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url) | |
57 | if mobj is None: | |
58 | raise ExtractorError(u'Cannot extract extention') | |
59 | ext = mobj.group('ext') | |
60 | ||
61 | return [{'id': video_id, | |
62 | 'url': mms_url, | |
63 | 'title': title, | |
64 | 'ext': ext | |
65 | }] |