]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/mdr.py
[smotri] Adapt to new API and modernize
[yt-dlp.git] / youtube_dl / extractor / mdr.py
CommitLineData
e8f2025e
PH
1from __future__ import unicode_literals
2
df1d7da2 3import re
4
5from .common import InfoExtractor
df1d7da2 6
09dacfa5 7
df1d7da2 8class MDRIE(InfoExtractor):
e8f2025e 9 _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
bd1488ae
PH
10
11 # No tests, MDR regularily deletes its videos
e8f2025e
PH
12 _TEST = {
13 'url': 'http://www.mdr.de/fakt/video189002.html',
14 'only_matching': True,
15 }
df1d7da2 16
17 def _real_extract(self, url):
df1d7da2 18 m = re.match(self._VALID_URL, url)
19 video_id = m.group('video_id')
20 domain = m.group('domain')
df1d7da2 21
22 # determine title and media streams from webpage
23 html = self._download_webpage(url, video_id)
df1d7da2 24
e8f2025e 25 title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
09dacfa5 26 xmlurl = self._search_regex(
e8f2025e 27 r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
09dacfa5
PH
28
29 doc = self._download_xml(domain + xmlurl, video_id)
30 formats = []
31 for a in doc.findall('./assets/asset'):
32 url_el = a.find('.//progressiveDownloadUrl')
33 if url_el is None:
34 continue
35 abr = int(a.find('bitrateAudio').text) // 1000
36 media_type = a.find('mediaType').text
37 format = {
38 'abr': abr,
39 'filesize': int(a.find('fileSize').text),
40 'url': url_el.text,
41 }
42
43 vbr_el = a.find('bitrateVideo')
44 if vbr_el is None:
45 format.update({
46 'vcodec': 'none',
e8f2025e 47 'format_id': '%s-%d' % (media_type, abr),
09dacfa5
PH
48 })
49 else:
50 vbr = int(vbr_el.text) // 1000
51 format.update({
52 'vbr': vbr,
53 'width': int(a.find('frameWidth').text),
54 'height': int(a.find('frameHeight').text),
e8f2025e 55 'format_id': '%s-%d' % (media_type, vbr),
09dacfa5
PH
56 })
57 formats.append(format)
b874fe2d
PH
58 self._sort_formats(formats)
59
09dacfa5
PH
60 return {
61 'id': video_id,
62 'title': title,
63 'formats': formats,
64 }