]>
Commit | Line | Data |
---|---|---|
2b1b2d83 | 1 | # coding: utf-8 |
e8f2025e PH |
2 | from __future__ import unicode_literals |
3 | ||
df1d7da2 | 4 | from .common import InfoExtractor |
2b1b2d83 S |
5 | from ..compat import compat_urlparse |
6 | from ..utils import ( | |
7 | determine_ext, | |
8 | int_or_none, | |
9 | parse_duration, | |
10 | parse_iso8601, | |
11 | xpath_text, | |
12 | ) | |
df1d7da2 | 13 | |
09dacfa5 | 14 | |
df1d7da2 | 15 | class MDRIE(InfoExtractor): |
2b1b2d83 S |
16 | IE_DESC = 'MDR.DE and KiKA' |
17 | _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html' | |
5f6a1245 | 18 | |
2b1b2d83 S |
19 | _TESTS = [{ |
20 | # MDR regularily deletes its videos | |
e8f2025e PH |
21 | 'url': 'http://www.mdr.de/fakt/video189002.html', |
22 | 'only_matching': True, | |
2b1b2d83 S |
23 | }, { |
24 | 'url': 'http://www.kika.de/baumhaus/videos/video19636.html', | |
25 | 'md5': '4930515e36b06c111213e80d1e4aad0e', | |
26 | 'info_dict': { | |
27 | 'id': '19636', | |
28 | 'ext': 'mp4', | |
29 | 'title': 'Baumhaus vom 30. Oktober 2015', | |
30 | 'duration': 134, | |
31 | 'uploader': 'KIKA', | |
32 | }, | |
33 | }, { | |
34 | 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', | |
35 | 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', | |
36 | 'info_dict': { | |
37 | 'id': '8182', | |
38 | 'ext': 'mp4', | |
39 | 'title': 'Beutolomäus und der geheime Weihnachtswunsch', | |
40 | 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', | |
41 | 'timestamp': 1419047100, | |
42 | 'upload_date': '20141220', | |
43 | 'duration': 4628, | |
44 | 'uploader': 'KIKA', | |
45 | }, | |
46 | }, { | |
47 | 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', | |
48 | 'only_matching': True, | |
49 | }, { | |
50 | 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html', | |
51 | 'only_matching': True, | |
52 | }] | |
df1d7da2 | 53 | |
54 | def _real_extract(self, url): | |
2b1b2d83 S |
55 | video_id = self._match_id(url) |
56 | ||
57 | webpage = self._download_webpage(url, video_id) | |
df1d7da2 | 58 | |
2b1b2d83 S |
59 | data_url = self._search_regex( |
60 | r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1', | |
61 | webpage, 'data url', group='url') | |
df1d7da2 | 62 | |
2b1b2d83 S |
63 | doc = self._download_xml( |
64 | compat_urlparse.urljoin(url, data_url), video_id) | |
65 | ||
66 | title = (xpath_text(doc, './title', 'title', default=None) or | |
67 | xpath_text(doc, './broadcast/broadcastName', 'title')) | |
09dacfa5 | 68 | |
09dacfa5 | 69 | formats = [] |
2b1b2d83 S |
70 | processed_urls = [] |
71 | for asset in doc.findall('./assets/asset'): | |
72 | for source in ( | |
73 | 'progressiveDownload', | |
74 | 'dynamicHttpStreamingRedirector', | |
75 | 'adaptiveHttpStreamingRedirector'): | |
76 | url_el = asset.find('./%sUrl' % source) | |
77 | if url_el is None: | |
78 | continue | |
79 | ||
80 | video_url = url_el.text | |
81 | if video_url in processed_urls: | |
82 | continue | |
83 | ||
84 | processed_urls.append(video_url) | |
85 | ||
86 | vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) | |
87 | abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) | |
88 | ||
89 | url_formats = [] | |
90 | ||
91 | ext = determine_ext(url_el.text) | |
92 | if ext == 'm3u8': | |
93 | url_formats = self._extract_m3u8_formats( | |
94 | video_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
95 | preference=0, m3u8_id='HLS', fatal=False) | |
96 | elif ext == 'f4m': | |
97 | url_formats = self._extract_f4m_formats( | |
98 | video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, | |
99 | preference=0, f4m_id='HDS', fatal=False) | |
100 | else: | |
101 | media_type = xpath_text(asset, './mediaType', 'media type', default='MP4') | |
102 | vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) | |
103 | abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) | |
104 | filesize = int_or_none(xpath_text(asset, './fileSize', 'file size')) | |
105 | ||
106 | f = { | |
107 | 'url': video_url, | |
108 | 'format_id': '%s-%d' % (media_type, vbr or abr), | |
109 | 'filesize': filesize, | |
110 | 'abr': abr, | |
111 | 'preference': 1, | |
112 | } | |
113 | ||
114 | if vbr: | |
115 | width = int_or_none(xpath_text(asset, './frameWidth', 'width')) | |
116 | height = int_or_none(xpath_text(asset, './frameHeight', 'height')) | |
117 | f.update({ | |
118 | 'vbr': vbr, | |
119 | 'width': width, | |
120 | 'height': height, | |
121 | }) | |
122 | ||
123 | url_formats.append(f) | |
124 | ||
125 | if not vbr: | |
126 | for f in url_formats: | |
127 | abr = f.get('tbr') or abr | |
128 | if 'tbr' in f: | |
129 | del f['tbr'] | |
130 | f.update({ | |
131 | 'abr': abr, | |
132 | 'vcodec': 'none', | |
133 | }) | |
134 | ||
135 | if url_formats: | |
136 | formats.extend(url_formats) | |
b874fe2d PH |
137 | self._sort_formats(formats) |
138 | ||
2b1b2d83 S |
139 | description = xpath_text(doc, './broadcast/broadcastDescription', 'description') |
140 | timestamp = parse_iso8601( | |
141 | xpath_text(doc, './broadcast/broadcastDate', 'timestamp', default=None) or | |
142 | xpath_text(doc, './broadcast/broadcastStartDate', 'timestamp', default=None)) | |
143 | duration = parse_duration(xpath_text(doc, './duration', 'duration')) | |
144 | uploader = xpath_text(doc, './rights', 'uploader') | |
145 | ||
09dacfa5 PH |
146 | return { |
147 | 'id': video_id, | |
148 | 'title': title, | |
2b1b2d83 S |
149 | 'description': description, |
150 | 'timestamp': timestamp, | |
151 | 'duration': duration, | |
152 | 'uploader': uploader, | |
09dacfa5 PH |
153 | 'formats': formats, |
154 | } |