]>
Commit | Line | Data |
---|---|---|
e9ea0bf1 S |
1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
c1ed1f70 S |
7 | from ..utils import ( |
8 | ExtractorError, | |
9 | int_or_none, | |
77541837 | 10 | qualities, |
c1ed1f70 | 11 | ) |
e9ea0bf1 S |
12 | |
13 | ||
14 | class NDRIE(InfoExtractor): | |
15 | IE_NAME = 'ndr' | |
16 | IE_DESC = 'NDR.de - Mediathek' | |
17 | _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html' | |
18 | ||
19 | _TESTS = [ | |
e9ea0bf1 | 20 | { |
1409704a | 21 | 'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html', |
22 | 'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c', | |
008fda0f | 23 | 'note': 'Video file', |
e9ea0bf1 | 24 | 'info_dict': { |
1409704a | 25 | 'id': '25866', |
e9ea0bf1 | 26 | 'ext': 'mp4', |
1409704a | 27 | 'title': 'Kartoffeltage in der Lewitz', |
2c9f3118 | 28 | 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8', |
1409704a | 29 | 'duration': 166, |
30 | } | |
e9ea0bf1 | 31 | }, |
e9ea0bf1 | 32 | { |
463b3346 S |
33 | 'url': 'http://www.ndr.de/info/audio51535.html', |
34 | 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | |
008fda0f | 35 | 'note': 'Audio file', |
e9ea0bf1 | 36 | 'info_dict': { |
463b3346 | 37 | 'id': '51535', |
e9ea0bf1 | 38 | 'ext': 'mp3', |
463b3346 S |
39 | 'title': 'La Valette entgeht der Hinrichtung', |
40 | 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', | |
41 | 'duration': 884, | |
e9ea0bf1 S |
42 | } |
43 | } | |
44 | ] | |
45 | ||
46 | def _real_extract(self, url): | |
47 | mobj = re.match(self._VALID_URL, url) | |
48 | video_id = mobj.group('id') | |
49 | ||
50 | page = self._download_webpage(url, video_id, 'Downloading page') | |
51 | ||
c1ed1f70 | 52 | title = self._og_search_title(page).strip() |
e9ea0bf1 | 53 | description = self._og_search_description(page) |
c1ed1f70 S |
54 | if description: |
55 | description = description.strip() | |
e9ea0bf1 | 56 | |
c1ed1f70 | 57 | duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False)) |
e9ea0bf1 S |
58 | |
59 | formats = [] | |
60 | ||
77541837 | 61 | mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) |
e9ea0bf1 S |
62 | if mp3_url: |
63 | formats.append({ | |
64 | 'url': mp3_url.group('audio'), | |
65 | 'format_id': 'mp3', | |
66 | }) | |
67 | ||
68 | thumbnail = None | |
69 | ||
a6dae6c0 | 70 | video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page) |
e9ea0bf1 | 71 | if video_url: |
77541837 | 72 | thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page) |
1514f749 | 73 | if thumbnails: |
77541837 | 74 | quality_key = qualities(['xs', 's', 'm', 'l', 'xl']) |
75 | largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1])) | |
76 | thumbnail = 'http://www.ndr.de' + largest[0] | |
1514f749 | 77 | |
77541837 | 78 | for format_id in 'lo', 'hi', 'hq': |
e9ea0bf1 S |
79 | formats.append({ |
80 | 'url': '%s.%s.mp4' % (video_url.group('video'), format_id), | |
81 | 'format_id': format_id, | |
82 | }) | |
83 | ||
84 | if not formats: | |
85 | raise ExtractorError('No media links available for %s' % video_id) | |
86 | ||
87 | return { | |
88 | 'id': video_id, | |
89 | 'title': title, | |
90 | 'description': description, | |
91 | 'thumbnail': thumbnail, | |
92 | 'duration': duration, | |
93 | 'formats': formats, | |
5f6a1245 | 94 | } |