]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import functools | |
5 | import re | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | int_or_none, | |
10 | unified_strdate, | |
11 | OnDemandPagedList, | |
12 | xpath_text, | |
13 | ) | |
14 | ||
15 | ||
16 | def extract_from_xml_url(ie, video_id, xml_url): | |
17 | doc = ie._download_xml( | |
18 | xml_url, video_id, | |
19 | note='Downloading video info', | |
20 | errnote='Failed to download video info') | |
21 | ||
22 | title = doc.find('.//information/title').text | |
23 | description = xpath_text(doc, './/information/detail', 'description') | |
24 | duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) | |
25 | uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') | |
26 | uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') | |
27 | upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) | |
28 | ||
29 | def xml_to_format(fnode): | |
30 | video_url = fnode.find('url').text | |
31 | is_available = 'http://www.metafilegenerator' not in video_url | |
32 | ||
33 | format_id = fnode.attrib['basetype'] | |
34 | format_m = re.match(r'''(?x) | |
35 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
36 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
37 | ''', format_id) | |
38 | ||
39 | ext = format_m.group('container') | |
40 | proto = format_m.group('proto').lower() | |
41 | ||
42 | quality = xpath_text(fnode, './quality', 'quality') | |
43 | abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) | |
44 | vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) | |
45 | ||
46 | width = int_or_none(xpath_text(fnode, './width', 'width')) | |
47 | height = int_or_none(xpath_text(fnode, './height', 'height')) | |
48 | ||
49 | filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) | |
50 | ||
51 | format_note = '' | |
52 | if not format_note: | |
53 | format_note = None | |
54 | ||
55 | return { | |
56 | 'format_id': format_id + '-' + quality, | |
57 | 'url': video_url, | |
58 | 'ext': ext, | |
59 | 'acodec': format_m.group('acodec'), | |
60 | 'vcodec': format_m.group('vcodec'), | |
61 | 'abr': abr, | |
62 | 'vbr': vbr, | |
63 | 'width': width, | |
64 | 'height': height, | |
65 | 'filesize': filesize, | |
66 | 'format_note': format_note, | |
67 | 'protocol': proto, | |
68 | '_available': is_available, | |
69 | } | |
70 | ||
71 | def xml_to_thumbnails(fnode): | |
72 | thumbnails = [] | |
73 | for node in fnode: | |
74 | thumbnail_url = node.text | |
75 | if not thumbnail_url: | |
76 | continue | |
77 | thumbnail = { | |
78 | 'url': thumbnail_url, | |
79 | } | |
80 | if 'key' in node.attrib: | |
81 | m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) | |
82 | if m: | |
83 | thumbnail['width'] = int(m.group(1)) | |
84 | thumbnail['height'] = int(m.group(2)) | |
85 | thumbnails.append(thumbnail) | |
86 | return thumbnails | |
87 | ||
88 | thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) | |
89 | ||
90 | format_nodes = doc.findall('.//formitaeten/formitaet') | |
91 | formats = list(filter( | |
92 | lambda f: f['_available'], | |
93 | map(xml_to_format, format_nodes))) | |
94 | ie._sort_formats(formats) | |
95 | ||
96 | return { | |
97 | 'id': video_id, | |
98 | 'title': title, | |
99 | 'description': description, | |
100 | 'duration': duration, | |
101 | 'thumbnails': thumbnails, | |
102 | 'uploader': uploader, | |
103 | 'uploader_id': uploader_id, | |
104 | 'upload_date': upload_date, | |
105 | 'formats': formats, | |
106 | } | |
107 | ||
108 | ||
109 | class ZDFIE(InfoExtractor): | |
110 | _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' | |
111 | ||
112 | _TEST = { | |
113 | 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', | |
114 | 'info_dict': { | |
115 | 'id': '2037704', | |
116 | 'ext': 'webm', | |
117 | 'title': 'ZDFspezial - Ende des Machtpokers', | |
118 | 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".', | |
119 | 'duration': 1022, | |
120 | 'uploader': 'spezial', | |
121 | 'uploader_id': '225948', | |
122 | 'upload_date': '20131127', | |
123 | }, | |
124 | 'skip': 'Videos on ZDF.de are depublicised in short order', | |
125 | } | |
126 | ||
127 | def _real_extract(self, url): | |
128 | video_id = self._match_id(url) | |
129 | xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | |
130 | return extract_from_xml_url(self, video_id, xml_url) | |
131 | ||
132 | ||
133 | class ZDFChannelIE(InfoExtractor): | |
134 | _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' | |
135 | _TEST = { | |
136 | 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', | |
137 | 'info_dict': { | |
138 | 'id': '1586442', | |
139 | }, | |
140 | 'playlist_count': 3, | |
141 | } | |
142 | _PAGE_SIZE = 50 | |
143 | ||
144 | def _fetch_page(self, channel_id, page): | |
145 | offset = page * self._PAGE_SIZE | |
146 | xml_url = ( | |
147 | 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' | |
148 | % (offset, self._PAGE_SIZE, channel_id)) | |
149 | doc = self._download_xml( | |
150 | xml_url, channel_id, | |
151 | note='Downloading channel info', | |
152 | errnote='Failed to download channel info') | |
153 | ||
154 | title = doc.find('.//information/title').text | |
155 | description = doc.find('.//information/detail').text | |
156 | for asset in doc.findall('.//teasers/teaser'): | |
157 | a_type = asset.find('./type').text | |
158 | a_id = asset.find('./details/assetId').text | |
159 | if a_type not in ('video', 'topic'): | |
160 | continue | |
161 | yield { | |
162 | '_type': 'url', | |
163 | 'playlist_title': title, | |
164 | 'playlist_description': description, | |
165 | 'url': 'zdf:%s:%s' % (a_type, a_id), | |
166 | } | |
167 | ||
168 | def _real_extract(self, url): | |
169 | channel_id = self._match_id(url) | |
170 | entries = OnDemandPagedList( | |
171 | functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) | |
172 | ||
173 | return { | |
174 | '_type': 'playlist', | |
175 | 'id': channel_id, | |
176 | 'entries': entries, | |
177 | } |