]>
Commit | Line | Data |
---|---|---|
a2e6db36 | 1 | # coding: utf-8 |
919052d0 | 2 | from __future__ import unicode_literals |
a2e6db36 | 3 | |
9abd500a | 4 | import functools |
d5822b96 PH |
5 | import re |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
61224dbc | 9 | int_or_none, |
02dbf93f | 10 | unified_strdate, |
9abd500a | 11 | OnDemandPagedList, |
7b091c37 | 12 | xpath_text, |
d5822b96 PH |
13 | ) |
14 | ||
0b7c2485 | 15 | |
8cc3eba7 PH |
16 | def extract_from_xml_url(ie, video_id, xml_url): |
17 | doc = ie._download_xml( | |
18 | xml_url, video_id, | |
19 | note='Downloading video info', | |
20 | errnote='Failed to download video info') | |
21 | ||
22 | title = doc.find('.//information/title').text | |
7b091c37 S |
23 | description = xpath_text(doc, './/information/detail', 'description') |
24 | duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) | |
25 | uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') | |
26 | uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') | |
27 | upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) | |
8cc3eba7 PH |
28 | |
29 | def xml_to_format(fnode): | |
30 | video_url = fnode.find('url').text | |
31 | is_available = 'http://www.metafilegenerator' not in video_url | |
32 | ||
33 | format_id = fnode.attrib['basetype'] | |
34 | format_m = re.match(r'''(?x) | |
35 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
36 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
37 | ''', format_id) | |
38 | ||
39 | ext = format_m.group('container') | |
40 | proto = format_m.group('proto').lower() | |
41 | ||
7b091c37 S |
42 | quality = xpath_text(fnode, './quality', 'quality') |
43 | abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) | |
44 | vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) | |
8cc3eba7 | 45 | |
7b091c37 S |
46 | width = int_or_none(xpath_text(fnode, './width', 'width')) |
47 | height = int_or_none(xpath_text(fnode, './height', 'height')) | |
48 | ||
49 | filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) | |
8cc3eba7 PH |
50 | |
51 | format_note = '' | |
52 | if not format_note: | |
53 | format_note = None | |
54 | ||
55 | return { | |
56 | 'format_id': format_id + '-' + quality, | |
57 | 'url': video_url, | |
58 | 'ext': ext, | |
59 | 'acodec': format_m.group('acodec'), | |
60 | 'vcodec': format_m.group('vcodec'), | |
61 | 'abr': abr, | |
62 | 'vbr': vbr, | |
63 | 'width': width, | |
64 | 'height': height, | |
7b091c37 | 65 | 'filesize': filesize, |
8cc3eba7 PH |
66 | 'format_note': format_note, |
67 | 'protocol': proto, | |
68 | '_available': is_available, | |
69 | } | |
70 | ||
264b23e1 | 71 | def xml_to_thumbnails(fnode): |
7b091c37 | 72 | thumbnails = [] |
264b23e1 | 73 | for node in fnode: |
7b091c37 S |
74 | thumbnail_url = node.text |
75 | if not thumbnail_url: | |
76 | continue | |
77 | thumbnail = { | |
78 | 'url': thumbnail_url, | |
79 | } | |
8cc83d30 | 80 | if 'key' in node.attrib: |
b7cedb16 | 81 | m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) |
82 | if m: | |
83 | thumbnail['width'] = int(m.group(1)) | |
84 | thumbnail['height'] = int(m.group(2)) | |
264b23e1 | 85 | thumbnails.append(thumbnail) |
86 | return thumbnails | |
87 | ||
7b091c37 | 88 | thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) |
264b23e1 | 89 | |
8cc3eba7 PH |
90 | format_nodes = doc.findall('.//formitaeten/formitaet') |
91 | formats = list(filter( | |
92 | lambda f: f['_available'], | |
93 | map(xml_to_format, format_nodes))) | |
94 | ie._sort_formats(formats) | |
95 | ||
96 | return { | |
97 | 'id': video_id, | |
98 | 'title': title, | |
99 | 'description': description, | |
100 | 'duration': duration, | |
264b23e1 | 101 | 'thumbnails': thumbnails, |
8cc3eba7 PH |
102 | 'uploader': uploader, |
103 | 'uploader_id': uploader_id, | |
104 | 'upload_date': upload_date, | |
105 | 'formats': formats, | |
106 | } | |
107 | ||
108 | ||
d5822b96 | 109 | class ZDFIE(InfoExtractor): |
9abd500a | 110 | _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' |
a2e6db36 PH |
111 | |
112 | _TEST = { | |
919052d0 S |
113 | 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', |
114 | 'info_dict': { | |
115 | 'id': '2037704', | |
116 | 'ext': 'webm', | |
117 | 'title': 'ZDFspezial - Ende des Machtpokers', | |
118 | 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".', | |
119 | 'duration': 1022, | |
120 | 'uploader': 'spezial', | |
121 | 'uploader_id': '225948', | |
122 | 'upload_date': '20131127', | |
a2e6db36 | 123 | }, |
919052d0 | 124 | 'skip': 'Videos on ZDF.de are depublicised in short order', |
a2e6db36 | 125 | } |
d5822b96 | 126 | |
9abd500a PH |
127 | def _real_extract(self, url): |
128 | video_id = self._match_id(url) | |
919052d0 | 129 | xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id |
8cc3eba7 | 130 | return extract_from_xml_url(self, video_id, xml_url) |
8560c618 | 131 | |
8560c618 | 132 | |
9abd500a PH |
133 | class ZDFChannelIE(InfoExtractor): |
134 | _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' | |
8560c618 AK |
135 | _TEST = { |
136 | 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', | |
137 | 'info_dict': { | |
138 | 'id': '1586442', | |
8560c618 | 139 | }, |
d51a853d | 140 | 'playlist_count': 3, |
8560c618 | 141 | } |
9abd500a PH |
142 | _PAGE_SIZE = 50 |
143 | ||
144 | def _fetch_page(self, channel_id, page): | |
145 | offset = page * self._PAGE_SIZE | |
146 | xml_url = ( | |
147 | 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' | |
148 | % (offset, self._PAGE_SIZE, channel_id)) | |
149 | doc = self._download_xml( | |
150 | xml_url, channel_id, | |
151 | note='Downloading channel info', | |
152 | errnote='Failed to download channel info') | |
153 | ||
154 | title = doc.find('.//information/title').text | |
155 | description = doc.find('.//information/detail').text | |
156 | for asset in doc.findall('.//teasers/teaser'): | |
157 | a_type = asset.find('./type').text | |
158 | a_id = asset.find('./details/assetId').text | |
159 | if a_type not in ('video', 'topic'): | |
160 | continue | |
161 | yield { | |
162 | '_type': 'url', | |
163 | 'playlist_title': title, | |
164 | 'playlist_description': description, | |
165 | 'url': 'zdf:%s:%s' % (a_type, a_id), | |
166 | } | |
167 | ||
168 | def _real_extract(self, url): | |
169 | channel_id = self._match_id(url) | |
170 | entries = OnDemandPagedList( | |
171 | functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) | |
8560c618 | 172 | |
8560c618 AK |
173 | return { |
174 | '_type': 'playlist', | |
9abd500a PH |
175 | 'id': channel_id, |
176 | 'entries': entries, | |
8560c618 | 177 | } |