]>
Commit | Line | Data |
---|---|---|
e9ea0bf1 S |
1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ExtractorError | |
8 | ||
9 | ||
10 | class NDRIE(InfoExtractor): | |
11 | IE_NAME = 'ndr' | |
12 | IE_DESC = 'NDR.de - Mediathek' | |
13 | _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html' | |
14 | ||
15 | _TESTS = [ | |
e9ea0bf1 | 16 | { |
008fda0f S |
17 | 'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', |
18 | 'md5': 'e7a6079ca39d3568f4996cb858dd6708', | |
19 | 'note': 'Video file', | |
e9ea0bf1 | 20 | 'info_dict': { |
008fda0f | 21 | 'id': '7959', |
e9ea0bf1 | 22 | 'ext': 'mp4', |
008fda0f S |
23 | 'title': 'Markt - die ganze Sendung', |
24 | 'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', | |
25 | 'duration': 2655, | |
e9ea0bf1 S |
26 | }, |
27 | }, | |
e9ea0bf1 | 28 | { |
463b3346 S |
29 | 'url': 'http://www.ndr.de/info/audio51535.html', |
30 | 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | |
008fda0f | 31 | 'note': 'Audio file', |
e9ea0bf1 | 32 | 'info_dict': { |
463b3346 | 33 | 'id': '51535', |
e9ea0bf1 | 34 | 'ext': 'mp3', |
463b3346 S |
35 | 'title': 'La Valette entgeht der Hinrichtung', |
36 | 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', | |
37 | 'duration': 884, | |
e9ea0bf1 S |
38 | } |
39 | } | |
40 | ] | |
41 | ||
42 | def _real_extract(self, url): | |
43 | mobj = re.match(self._VALID_URL, url) | |
44 | video_id = mobj.group('id') | |
45 | ||
46 | page = self._download_webpage(url, video_id, 'Downloading page') | |
47 | ||
48 | title = self._og_search_title(page) | |
49 | description = self._og_search_description(page) | |
50 | ||
51 | mobj = re.search( | |
52 | r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>', | |
53 | page) | |
54 | duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None | |
55 | ||
56 | formats = [] | |
57 | ||
58 | mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) | |
59 | if mp3_url: | |
60 | formats.append({ | |
61 | 'url': mp3_url.group('audio'), | |
62 | 'format_id': 'mp3', | |
63 | }) | |
64 | ||
65 | thumbnail = None | |
66 | ||
67 | video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page) | |
68 | if video_url: | |
69 | thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",', | |
70 | page, 'thumbnail', fatal=False) | |
71 | if thumbnail: | |
72 | thumbnail = 'http://www.ndr.de' + thumbnail | |
73 | for format_id in ['lo', 'hi', 'hq']: | |
74 | formats.append({ | |
75 | 'url': '%s.%s.mp4' % (video_url.group('video'), format_id), | |
76 | 'format_id': format_id, | |
77 | }) | |
78 | ||
79 | if not formats: | |
80 | raise ExtractorError('No media links available for %s' % video_id) | |
81 | ||
82 | return { | |
83 | 'id': video_id, | |
84 | 'title': title, | |
85 | 'description': description, | |
86 | 'thumbnail': thumbnail, | |
87 | 'duration': duration, | |
88 | 'formats': formats, | |
89 | } |