]>
Commit | Line | Data |
---|---|---|
a2e6db36 | 1 | # coding: utf-8 |
919052d0 | 2 | from __future__ import unicode_literals |
a2e6db36 | 3 | |
9abd500a | 4 | import functools |
d5822b96 PH |
5 | import re |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
61224dbc | 9 | int_or_none, |
02dbf93f | 10 | unified_strdate, |
9abd500a | 11 | OnDemandPagedList, |
d5822b96 PH |
12 | ) |
13 | ||
0b7c2485 | 14 | |
8cc3eba7 PH |
15 | def extract_from_xml_url(ie, video_id, xml_url): |
16 | doc = ie._download_xml( | |
17 | xml_url, video_id, | |
18 | note='Downloading video info', | |
19 | errnote='Failed to download video info') | |
20 | ||
21 | title = doc.find('.//information/title').text | |
22 | description = doc.find('.//information/detail').text | |
23 | duration = int(doc.find('.//details/lengthSec').text) | |
24 | uploader_node = doc.find('.//details/originChannelTitle') | |
25 | uploader = None if uploader_node is None else uploader_node.text | |
26 | uploader_id_node = doc.find('.//details/originChannelId') | |
27 | uploader_id = None if uploader_id_node is None else uploader_id_node.text | |
28 | upload_date = unified_strdate(doc.find('.//details/airtime').text) | |
29 | ||
30 | def xml_to_format(fnode): | |
31 | video_url = fnode.find('url').text | |
32 | is_available = 'http://www.metafilegenerator' not in video_url | |
33 | ||
34 | format_id = fnode.attrib['basetype'] | |
35 | format_m = re.match(r'''(?x) | |
36 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
37 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
38 | ''', format_id) | |
39 | ||
40 | ext = format_m.group('container') | |
41 | proto = format_m.group('proto').lower() | |
42 | ||
43 | quality = fnode.find('./quality').text | |
44 | abr = int(fnode.find('./audioBitrate').text) // 1000 | |
45 | vbr_node = fnode.find('./videoBitrate') | |
46 | vbr = None if vbr_node is None else int(vbr_node.text) // 1000 | |
47 | ||
48 | width_node = fnode.find('./width') | |
49 | width = None if width_node is None else int_or_none(width_node.text) | |
50 | height_node = fnode.find('./height') | |
51 | height = None if height_node is None else int_or_none(height_node.text) | |
52 | ||
53 | format_note = '' | |
54 | if not format_note: | |
55 | format_note = None | |
56 | ||
57 | return { | |
58 | 'format_id': format_id + '-' + quality, | |
59 | 'url': video_url, | |
60 | 'ext': ext, | |
61 | 'acodec': format_m.group('acodec'), | |
62 | 'vcodec': format_m.group('vcodec'), | |
63 | 'abr': abr, | |
64 | 'vbr': vbr, | |
65 | 'width': width, | |
66 | 'height': height, | |
67 | 'filesize': int_or_none(fnode.find('./filesize').text), | |
68 | 'format_note': format_note, | |
69 | 'protocol': proto, | |
70 | '_available': is_available, | |
71 | } | |
72 | ||
73 | format_nodes = doc.findall('.//formitaeten/formitaet') | |
74 | formats = list(filter( | |
75 | lambda f: f['_available'], | |
76 | map(xml_to_format, format_nodes))) | |
77 | ie._sort_formats(formats) | |
78 | ||
79 | return { | |
80 | 'id': video_id, | |
81 | 'title': title, | |
82 | 'description': description, | |
83 | 'duration': duration, | |
84 | 'uploader': uploader, | |
85 | 'uploader_id': uploader_id, | |
86 | 'upload_date': upload_date, | |
87 | 'formats': formats, | |
88 | } | |
89 | ||
90 | ||
d5822b96 | 91 | class ZDFIE(InfoExtractor): |
9abd500a | 92 | _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' |
a2e6db36 PH |
93 | |
94 | _TEST = { | |
919052d0 S |
95 | 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', |
96 | 'info_dict': { | |
97 | 'id': '2037704', | |
98 | 'ext': 'webm', | |
99 | 'title': 'ZDFspezial - Ende des Machtpokers', | |
100 | 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".', | |
101 | 'duration': 1022, | |
102 | 'uploader': 'spezial', | |
103 | 'uploader_id': '225948', | |
104 | 'upload_date': '20131127', | |
a2e6db36 | 105 | }, |
919052d0 | 106 | 'skip': 'Videos on ZDF.de are depublicised in short order', |
a2e6db36 | 107 | } |
d5822b96 | 108 | |
9abd500a PH |
109 | def _real_extract(self, url): |
110 | video_id = self._match_id(url) | |
919052d0 | 111 | xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id |
8cc3eba7 | 112 | return extract_from_xml_url(self, video_id, xml_url) |
8560c618 | 113 | |
8560c618 | 114 | |
9abd500a PH |
115 | class ZDFChannelIE(InfoExtractor): |
116 | _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' | |
8560c618 AK |
117 | _TEST = { |
118 | 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', | |
119 | 'info_dict': { | |
120 | 'id': '1586442', | |
8560c618 | 121 | }, |
d51a853d | 122 | 'playlist_count': 3, |
8560c618 | 123 | } |
9abd500a PH |
124 | _PAGE_SIZE = 50 |
125 | ||
126 | def _fetch_page(self, channel_id, page): | |
127 | offset = page * self._PAGE_SIZE | |
128 | xml_url = ( | |
129 | 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' | |
130 | % (offset, self._PAGE_SIZE, channel_id)) | |
131 | doc = self._download_xml( | |
132 | xml_url, channel_id, | |
133 | note='Downloading channel info', | |
134 | errnote='Failed to download channel info') | |
135 | ||
136 | title = doc.find('.//information/title').text | |
137 | description = doc.find('.//information/detail').text | |
138 | for asset in doc.findall('.//teasers/teaser'): | |
139 | a_type = asset.find('./type').text | |
140 | a_id = asset.find('./details/assetId').text | |
141 | if a_type not in ('video', 'topic'): | |
142 | continue | |
143 | yield { | |
144 | '_type': 'url', | |
145 | 'playlist_title': title, | |
146 | 'playlist_description': description, | |
147 | 'url': 'zdf:%s:%s' % (a_type, a_id), | |
148 | } | |
149 | ||
150 | def _real_extract(self, url): | |
151 | channel_id = self._match_id(url) | |
152 | entries = OnDemandPagedList( | |
153 | functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) | |
8560c618 | 154 | |
8560c618 AK |
155 | return { |
156 | '_type': 'playlist', | |
9abd500a PH |
157 | 'id': channel_id, |
158 | 'entries': entries, | |
8560c618 | 159 | } |