]>
Commit | Line | Data |
---|---|---|
a2e6db36 | 1 | # coding: utf-8 |
919052d0 | 2 | from __future__ import unicode_literals |
a2e6db36 | 3 | |
d5822b96 PH |
4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
61224dbc | 8 | int_or_none, |
02dbf93f | 9 | unified_strdate, |
d5822b96 PH |
10 | ) |
11 | ||
0b7c2485 | 12 | |
d5822b96 | 13 | class ZDFIE(InfoExtractor): |
a2e6db36 PH |
14 | _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' |
15 | ||
16 | _TEST = { | |
919052d0 S |
17 | 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', |
18 | 'info_dict': { | |
19 | 'id': '2037704', | |
20 | 'ext': 'webm', | |
21 | 'title': 'ZDFspezial - Ende des Machtpokers', | |
22 | 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".', | |
23 | 'duration': 1022, | |
24 | 'uploader': 'spezial', | |
25 | 'uploader_id': '225948', | |
26 | 'upload_date': '20131127', | |
a2e6db36 | 27 | }, |
919052d0 | 28 | 'skip': 'Videos on ZDF.de are depublicised in short order', |
a2e6db36 | 29 | } |
d5822b96 PH |
30 | |
31 | def _real_extract(self, url): | |
32 | mobj = re.match(self._VALID_URL, url) | |
d5822b96 PH |
33 | video_id = mobj.group('video_id') |
34 | ||
919052d0 | 35 | xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id |
2a275ab0 PH |
36 | doc = self._download_xml( |
37 | xml_url, video_id, | |
919052d0 S |
38 | note='Downloading video info', |
39 | errnote='Failed to download video info') | |
0b7c2485 | 40 | |
02dbf93f PH |
41 | title = doc.find('.//information/title').text |
42 | description = doc.find('.//information/detail').text | |
919052d0 | 43 | duration = int(doc.find('.//details/lengthSec').text) |
02dbf93f PH |
44 | uploader_node = doc.find('.//details/originChannelTitle') |
45 | uploader = None if uploader_node is None else uploader_node.text | |
919052d0 S |
46 | uploader_id_node = doc.find('.//details/originChannelId') |
47 | uploader_id = None if uploader_id_node is None else uploader_id_node.text | |
02dbf93f | 48 | upload_date = unified_strdate(doc.find('.//details/airtime').text) |
d5822b96 | 49 | |
02dbf93f PH |
50 | def xml_to_format(fnode): |
51 | video_url = fnode.find('url').text | |
919052d0 | 52 | is_available = 'http://www.metafilegenerator' not in video_url |
02dbf93f PH |
53 | |
54 | format_id = fnode.attrib['basetype'] | |
55 | format_m = re.match(r'''(?x) | |
56 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
57 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
58 | ''', format_id) | |
59 | ||
c059bdd4 | 60 | ext = format_m.group('container') |
61224dbc | 61 | proto = format_m.group('proto').lower() |
0b7c2485 | 62 | |
02dbf93f | 63 | quality = fnode.find('./quality').text |
02dbf93f | 64 | abr = int(fnode.find('./audioBitrate').text) // 1000 |
919052d0 S |
65 | vbr_node = fnode.find('./videoBitrate') |
66 | vbr = None if vbr_node is None else int(vbr_node.text) // 1000 | |
c059bdd4 | 67 | |
919052d0 S |
68 | width_node = fnode.find('./width') |
69 | width = None if width_node is None else int_or_none(width_node.text) | |
70 | height_node = fnode.find('./height') | |
71 | height = None if height_node is None else int_or_none(height_node.text) | |
72 | ||
73 | format_note = '' | |
c059bdd4 PH |
74 | if not format_note: |
75 | format_note = None | |
d5822b96 | 76 | |
02dbf93f | 77 | return { |
919052d0 | 78 | 'format_id': format_id + '-' + quality, |
02dbf93f | 79 | 'url': video_url, |
c059bdd4 | 80 | 'ext': ext, |
02dbf93f PH |
81 | 'acodec': format_m.group('acodec'), |
82 | 'vcodec': format_m.group('vcodec'), | |
83 | 'abr': abr, | |
84 | 'vbr': vbr, | |
919052d0 S |
85 | 'width': width, |
86 | 'height': height, | |
61224dbc | 87 | 'filesize': int_or_none(fnode.find('./filesize').text), |
c059bdd4 | 88 | 'format_note': format_note, |
61224dbc | 89 | 'protocol': proto, |
c059bdd4 | 90 | '_available': is_available, |
02dbf93f | 91 | } |
d5822b96 | 92 | |
02dbf93f | 93 | format_nodes = doc.findall('.//formitaeten/formitaet') |
c7deaa4c PH |
94 | formats = list(filter( |
95 | lambda f: f['_available'], | |
96 | map(xml_to_format, format_nodes))) | |
97 | ||
98 | self._sort_formats(formats) | |
d5822b96 | 99 | |
0b7c2485 PH |
100 | return { |
101 | 'id': video_id, | |
0b7c2485 | 102 | 'title': title, |
02dbf93f | 103 | 'description': description, |
02dbf93f | 104 | 'duration': duration, |
919052d0 S |
105 | 'uploader': uploader, |
106 | 'uploader_id': uploader_id, | |
02dbf93f | 107 | 'upload_date': upload_date, |
919052d0 S |
108 | 'formats': formats, |
109 | } |