]>
Commit | Line | Data |
---|---|---|
a2e6db36 PH |
1 | # coding: utf-8 |
2 | ||
d5822b96 PH |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
61224dbc | 7 | int_or_none, |
02dbf93f | 8 | unified_strdate, |
d5822b96 PH |
9 | ) |
10 | ||
0b7c2485 | 11 | |
d5822b96 | 12 | class ZDFIE(InfoExtractor): |
a2e6db36 PH |
13 | _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' |
14 | ||
15 | _TEST = { | |
16 | u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt", | |
17 | u"file": u"2037704.webm", | |
18 | u"info_dict": { | |
19 | u"upload_date": u"20131127", | |
20 | u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".", | |
21 | u"uploader": u"spezial", | |
22 | u"title": u"ZDFspezial - Ende des Machtpokers" | |
23 | }, | |
24 | u"skip": u"Videos on ZDF.de are depublicised in short order", | |
25 | } | |
d5822b96 PH |
26 | |
27 | def _real_extract(self, url): | |
28 | mobj = re.match(self._VALID_URL, url) | |
d5822b96 PH |
29 | video_id = mobj.group('video_id') |
30 | ||
02dbf93f | 31 | xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id |
2a275ab0 PH |
32 | doc = self._download_xml( |
33 | xml_url, video_id, | |
34 | note=u'Downloading video info', | |
35 | errnote=u'Failed to download video info') | |
0b7c2485 | 36 | |
02dbf93f PH |
37 | title = doc.find('.//information/title').text |
38 | description = doc.find('.//information/detail').text | |
39 | uploader_node = doc.find('.//details/originChannelTitle') | |
40 | uploader = None if uploader_node is None else uploader_node.text | |
41 | duration_str = doc.find('.//details/length').text | |
42 | duration_m = re.match(r'''(?x)^ | |
43 | (?P<hours>[0-9]{2}) | |
44 | :(?P<minutes>[0-9]{2}) | |
45 | :(?P<seconds>[0-9]{2}) | |
46 | (?:\.(?P<ms>[0-9]+)?) | |
47 | ''', duration_str) | |
48 | duration = ( | |
49 | ( | |
50 | (int(duration_m.group('hours')) * 60 * 60) + | |
51 | (int(duration_m.group('minutes')) * 60) + | |
52 | int(duration_m.group('seconds')) | |
53 | ) | |
54 | if duration_m | |
55 | else None | |
56 | ) | |
57 | upload_date = unified_strdate(doc.find('.//details/airtime').text) | |
d5822b96 | 58 | |
02dbf93f PH |
59 | def xml_to_format(fnode): |
60 | video_url = fnode.find('url').text | |
61 | is_available = u'http://www.metafilegenerator' not in video_url | |
62 | ||
63 | format_id = fnode.attrib['basetype'] | |
64 | format_m = re.match(r'''(?x) | |
65 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
66 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
67 | ''', format_id) | |
68 | ||
c059bdd4 | 69 | ext = format_m.group('container') |
61224dbc | 70 | proto = format_m.group('proto').lower() |
0b7c2485 | 71 | |
02dbf93f | 72 | quality = fnode.find('./quality').text |
02dbf93f PH |
73 | abr = int(fnode.find('./audioBitrate').text) // 1000 |
74 | vbr = int(fnode.find('./videoBitrate').text) // 1000 | |
c059bdd4 PH |
75 | |
76 | format_note = u'' | |
c059bdd4 PH |
77 | if not format_note: |
78 | format_note = None | |
d5822b96 | 79 | |
02dbf93f | 80 | return { |
c059bdd4 | 81 | 'format_id': format_id + u'-' + quality, |
02dbf93f | 82 | 'url': video_url, |
c059bdd4 | 83 | 'ext': ext, |
02dbf93f PH |
84 | 'acodec': format_m.group('acodec'), |
85 | 'vcodec': format_m.group('vcodec'), | |
86 | 'abr': abr, | |
87 | 'vbr': vbr, | |
61224dbc PH |
88 | 'width': int_or_none(fnode.find('./width').text), |
89 | 'height': int_or_none(fnode.find('./height').text), | |
90 | 'filesize': int_or_none(fnode.find('./filesize').text), | |
c059bdd4 | 91 | 'format_note': format_note, |
61224dbc | 92 | 'protocol': proto, |
c059bdd4 | 93 | '_available': is_available, |
02dbf93f | 94 | } |
d5822b96 | 95 | |
02dbf93f | 96 | format_nodes = doc.findall('.//formitaeten/formitaet') |
c7deaa4c PH |
97 | formats = list(filter( |
98 | lambda f: f['_available'], | |
99 | map(xml_to_format, format_nodes))) | |
100 | ||
101 | self._sort_formats(formats) | |
d5822b96 | 102 | |
0b7c2485 PH |
103 | return { |
104 | 'id': video_id, | |
0b7c2485 | 105 | 'title': title, |
02dbf93f PH |
106 | 'formats': formats, |
107 | 'description': description, | |
108 | 'uploader': uploader, | |
109 | 'duration': duration, | |
110 | 'upload_date': upload_date, | |
0b7c2485 | 111 | } |