]>
Commit | Line | Data |
---|---|---|
add96eb9 | 1 | import urllib.parse |
2 | ||
df1d7da2 | 3 | from .common import InfoExtractor |
2b1b2d83 S |
4 | from ..utils import ( |
5 | determine_ext, | |
6 | int_or_none, | |
34921b43 | 7 | join_nonempty, |
2b1b2d83 S |
8 | parse_duration, |
9 | parse_iso8601, | |
29f7c58a | 10 | url_or_none, |
2b1b2d83 S |
11 | xpath_text, |
12 | ) | |
df1d7da2 | 13 | |
09dacfa5 | 14 | |
df1d7da2 | 15 | class MDRIE(InfoExtractor): |
2b1b2d83 | 16 | IE_DESC = 'MDR.DE and KiKA' |
a5cf1798 | 17 | _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' |
5f6a1245 | 18 | |
29f7c58a | 19 | _GEO_COUNTRIES = ['DE'] |
20 | ||
2b1b2d83 | 21 | _TESTS = [{ |
dfb1b146 | 22 | # MDR regularly deletes its videos |
e8f2025e PH |
23 | 'url': 'http://www.mdr.de/fakt/video189002.html', |
24 | 'only_matching': True, | |
82b69a5c | 25 | }, { |
8cdb5c84 S |
26 | # audio |
27 | 'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html', | |
28 | 'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa', | |
29 | 'info_dict': { | |
30 | 'id': '1312272', | |
31 | 'ext': 'mp3', | |
32 | 'title': 'Feuilleton vom 30. Oktober 2015', | |
33 | 'duration': 250, | |
34 | 'uploader': 'MITTELDEUTSCHER RUNDFUNK', | |
35 | }, | |
a5cf1798 | 36 | 'skip': '404 not found', |
2b1b2d83 S |
37 | }, { |
38 | 'url': 'http://www.kika.de/baumhaus/videos/video19636.html', | |
39 | 'md5': '4930515e36b06c111213e80d1e4aad0e', | |
40 | 'info_dict': { | |
41 | 'id': '19636', | |
42 | 'ext': 'mp4', | |
43 | 'title': 'Baumhaus vom 30. Oktober 2015', | |
44 | 'duration': 134, | |
45 | 'uploader': 'KIKA', | |
46 | }, | |
a5cf1798 | 47 | 'skip': '404 not found', |
2b1b2d83 S |
48 | }, { |
49 | 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', | |
50 | 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', | |
51 | 'info_dict': { | |
52 | 'id': '8182', | |
53 | 'ext': 'mp4', | |
54 | 'title': 'Beutolomäus und der geheime Weihnachtswunsch', | |
55 | 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', | |
a5cf1798 YCH |
56 | 'timestamp': 1482541200, |
57 | 'upload_date': '20161224', | |
2b1b2d83 S |
58 | 'duration': 4628, |
59 | 'uploader': 'KIKA', | |
60 | }, | |
a5cf1798 YCH |
61 | }, { |
62 | # audio with alternative playerURL pattern | |
63 | 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', | |
64 | 'info_dict': { | |
65 | 'id': '100', | |
66 | 'ext': 'mp4', | |
67 | 'title': 'Feature: Operation Mindfuck - Robert Anton Wilson', | |
68 | 'duration': 3239, | |
69 | 'uploader': 'MITTELDEUTSCHER RUNDFUNK', | |
70 | }, | |
29f7c58a | 71 | }, { |
72 | # empty bitrateVideo and bitrateAudio | |
73 | 'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html', | |
74 | 'info_dict': { | |
75 | 'id': '128372', | |
76 | 'ext': 'mp4', | |
77 | 'title': 'Der kleine Wichtel kehrt zurück', | |
78 | 'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a', | |
79 | 'duration': 4876, | |
80 | 'timestamp': 1607823300, | |
81 | 'upload_date': '20201213', | |
82 | 'uploader': 'ZDF', | |
83 | }, | |
84 | 'params': { | |
85 | 'skip_download': True, | |
86 | }, | |
2b1b2d83 S |
87 | }, { |
88 | 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', | |
89 | 'only_matching': True, | |
90 | }, { | |
91 | 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html', | |
92 | 'only_matching': True, | |
2812c24c S |
93 | }, { |
94 | 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html', | |
95 | 'only_matching': True, | |
2b1b2d83 | 96 | }] |
df1d7da2 | 97 | |
98 | def _real_extract(self, url): | |
2b1b2d83 S |
99 | video_id = self._match_id(url) |
100 | ||
101 | webpage = self._download_webpage(url, video_id) | |
df1d7da2 | 102 | |
2b1b2d83 | 103 | data_url = self._search_regex( |
a5cf1798 | 104 | r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1', |
ec85ded8 | 105 | webpage, 'data url', group='url').replace(r'\/', '/') |
df1d7da2 | 106 | |
2b1b2d83 | 107 | doc = self._download_xml( |
add96eb9 | 108 | urllib.parse.urljoin(url, data_url), video_id) |
2b1b2d83 | 109 | |
11465da7 | 110 | title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) |
09dacfa5 | 111 | |
29f7c58a | 112 | type_ = xpath_text(doc, './type', default=None) |
113 | ||
09dacfa5 | 114 | formats = [] |
2b1b2d83 S |
115 | processed_urls = [] |
116 | for asset in doc.findall('./assets/asset'): | |
117 | for source in ( | |
29f7c58a | 118 | 'download', |
2b1b2d83 S |
119 | 'progressiveDownload', |
120 | 'dynamicHttpStreamingRedirector', | |
121 | 'adaptiveHttpStreamingRedirector'): | |
add96eb9 | 122 | url_el = asset.find(f'./{source}Url') |
2b1b2d83 S |
123 | if url_el is None: |
124 | continue | |
125 | ||
29f7c58a | 126 | video_url = url_or_none(url_el.text) |
127 | if not video_url or video_url in processed_urls: | |
2b1b2d83 S |
128 | continue |
129 | ||
130 | processed_urls.append(video_url) | |
131 | ||
29f7c58a | 132 | ext = determine_ext(video_url) |
2b1b2d83 | 133 | if ext == 'm3u8': |
29f7c58a | 134 | formats.extend(self._extract_m3u8_formats( |
2b1b2d83 | 135 | video_url, video_id, 'mp4', entry_protocol='m3u8_native', |
f983b875 | 136 | quality=1, m3u8_id='HLS', fatal=False)) |
2b1b2d83 | 137 | elif ext == 'f4m': |
29f7c58a | 138 | formats.extend(self._extract_f4m_formats( |
2b1b2d83 | 139 | video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, |
f983b875 | 140 | quality=1, f4m_id='HDS', fatal=False)) |
2b1b2d83 S |
141 | else: |
142 | media_type = xpath_text(asset, './mediaType', 'media type', default='MP4') | |
143 | vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) | |
144 | abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) | |
145 | filesize = int_or_none(xpath_text(asset, './fileSize', 'file size')) | |
146 | ||
147 | f = { | |
148 | 'url': video_url, | |
34921b43 | 149 | 'format_id': join_nonempty(media_type, vbr or abr), |
2b1b2d83 S |
150 | 'filesize': filesize, |
151 | 'abr': abr, | |
29f7c58a | 152 | 'vbr': vbr, |
2b1b2d83 S |
153 | } |
154 | ||
155 | if vbr: | |
2b1b2d83 | 156 | f.update({ |
29f7c58a | 157 | 'width': int_or_none(xpath_text(asset, './frameWidth', 'width')), |
158 | 'height': int_or_none(xpath_text(asset, './frameHeight', 'height')), | |
2b1b2d83 S |
159 | }) |
160 | ||
29f7c58a | 161 | if type_ == 'audio': |
162 | f['vcodec'] = 'none' | |
2b1b2d83 | 163 | |
29f7c58a | 164 | formats.append(f) |
76f0c50d | 165 | |
2b1b2d83 S |
166 | description = xpath_text(doc, './broadcast/broadcastDescription', 'description') |
167 | timestamp = parse_iso8601( | |
11465da7 S |
168 | xpath_text( |
169 | doc, [ | |
170 | './broadcast/broadcastDate', | |
171 | './broadcast/broadcastStartDate', | |
172 | './broadcast/broadcastEndDate'], | |
173 | 'timestamp', default=None)) | |
2b1b2d83 S |
174 | duration = parse_duration(xpath_text(doc, './duration', 'duration')) |
175 | uploader = xpath_text(doc, './rights', 'uploader') | |
176 | ||
09dacfa5 PH |
177 | return { |
178 | 'id': video_id, | |
179 | 'title': title, | |
2b1b2d83 S |
180 | 'description': description, |
181 | 'timestamp': timestamp, | |
182 | 'duration': duration, | |
183 | 'uploader': uploader, | |
09dacfa5 PH |
184 | 'formats': formats, |
185 | } |