]>
Commit | Line | Data |
---|---|---|
d5822b96 PH |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | ExtractorError, | |
6 | ) | |
7 | ||
8 | class ARDIE(InfoExtractor): | |
9 | _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' | |
10 | _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>' | |
11 | _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)' | |
6f5ac90c PH |
12 | _TEST = { |
13 | u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640', | |
14 | u'file': u'14077640.mp4', | |
15 | u'md5': u'6ca8824255460c787376353f9e20bbd8', | |
16 | u'info_dict': { | |
17 | u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden" | |
18 | }, | |
19 | u'skip': u'Requires rtmpdump' | |
20 | } | |
d5822b96 PH |
21 | |
22 | def _real_extract(self, url): | |
23 | # determine video id from url | |
24 | m = re.match(self._VALID_URL, url) | |
25 | ||
26 | numid = re.search(r'documentId=([0-9]+)', url) | |
27 | if numid: | |
28 | video_id = numid.group(1) | |
29 | else: | |
30 | video_id = m.group('video_id') | |
31 | ||
32 | # determine title and media streams from webpage | |
33 | html = self._download_webpage(url, video_id) | |
34 | title = re.search(self._TITLE, html).group('title') | |
20c3893f | 35 | streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)] |
d5822b96 PH |
36 | if not streams: |
37 | assert '"fsk"' in html | |
38 | raise ExtractorError(u'This video is only available after 8:00 pm') | |
39 | ||
40 | # choose default media type and highest quality for now | |
41 | stream = max([s for s in streams if int(s["media_type"]) == 0], | |
42 | key=lambda s: int(s["quality"])) | |
43 | ||
44 | # there's two possibilities: RTMP stream or HTTP download | |
45 | info = {'id': video_id, 'title': title, 'ext': 'mp4'} | |
46 | if stream['rtmp_url']: | |
47 | self.to_screen(u'RTMP download detected') | |
48 | assert stream['video_url'].startswith('mp4:') | |
49 | info["url"] = stream["rtmp_url"] | |
50 | info["play_path"] = stream['video_url'] | |
51 | else: | |
52 | assert stream["video_url"].endswith('.mp4') | |
53 | info["url"] = stream["video_url"] | |
54 | return [info] |