]>
Commit | Line | Data |
---|---|---|
73e79f2a PH |
1 | # coding: utf-8 |
2 | ||
3 | import re | |
4 | import xml.etree.ElementTree | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | determine_ext, | |
73e79f2a PH |
9 | unified_strdate, |
10 | ) | |
11 | ||
12 | ||
13 | class DreiSatIE(InfoExtractor): | |
14 | IE_NAME = '3sat' | |
15 | _VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' | |
16 | _TEST = { | |
17 | u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", | |
18 | u'file': u'36983.webm', | |
19 | u'md5': u'57c97d0469d71cf874f6815aa2b7c944', | |
20 | u'info_dict': { | |
21 | u"title": u"Kaffeeland Schweiz", | |
22 | u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", | |
23 | u"uploader": u"3sat", | |
24 | u"upload_date": u"20130622" | |
25 | } | |
26 | } | |
27 | ||
28 | ||
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | video_id = mobj.group('id') | |
32 | details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | |
33 | details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details') | |
34 | details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8')) | |
35 | ||
36 | thumbnail_els = details_doc.findall('.//teaserimage') | |
37 | thumbnails = [{ | |
38 | 'width': te.attrib['key'].partition('x')[0], | |
39 | 'height': te.attrib['key'].partition('x')[2], | |
40 | 'url': te.text, | |
41 | } for te in thumbnail_els] | |
42 | ||
43 | information_el = details_doc.find('.//information') | |
44 | video_title = information_el.find('./title').text | |
45 | video_description = information_el.find('./detail').text | |
46 | ||
47 | details_el = details_doc.find('.//details') | |
48 | video_uploader = details_el.find('./channel').text | |
49 | upload_date = unified_strdate(details_el.find('./airtime').text) | |
50 | ||
51 | format_els = details_doc.findall('.//formitaet') | |
52 | formats = [{ | |
53 | 'format_id': fe.attrib['basetype'], | |
54 | 'width': int(fe.find('./width').text), | |
55 | 'height': int(fe.find('./height').text), | |
56 | 'url': fe.find('./url').text, | |
471a5ee9 | 57 | 'ext': determine_ext(fe.find('./url').text), |
73e79f2a PH |
58 | 'filesize': int(fe.find('./filesize').text), |
59 | 'video_bitrate': int(fe.find('./videoBitrate').text), | |
60 | '3sat_qualityname': fe.find('./quality').text, | |
61 | } for fe in format_els | |
62 | if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')] | |
63 | ||
64 | def _sortkey(format): | |
65 | qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname']) | |
66 | prefer_http = 1 if 'rtmp' in format['url'] else 0 | |
67 | return (qidx, prefer_http, format['video_bitrate']) | |
68 | formats.sort(key=_sortkey) | |
69 | ||
70 | info = { | |
690e872c | 71 | '_type': 'video', |
73e79f2a PH |
72 | 'id': video_id, |
73 | 'title': video_title, | |
74 | 'formats': formats, | |
75 | 'description': video_description, | |
76 | 'thumbnails': thumbnails, | |
77 | 'thumbnail': thumbnails[-1]['url'], | |
78 | 'uploader': video_uploader, | |
79 | 'upload_date': upload_date, | |
80 | } | |
81 | ||
82 | # TODO: Remove when #980 has been merged | |
471a5ee9 | 83 | info.update(formats[-1]) |
73e79f2a | 84 | |
471a5ee9 | 85 | return info |