]>
Commit | Line | Data |
---|---|---|
6a5af6ac M |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import compat_urlparse | |
8 | ||
9 | ||
10 | class GolemIE(InfoExtractor): | |
11 | _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/' | |
12 | _TEST = { | |
13 | 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', | |
14 | 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', | |
15 | 'info_dict': { | |
16 | 'id': '14095', | |
17 | 'format_id': 'high', | |
18 | 'ext': 'mp4', | |
19 | 'title': 'iPhone 6 und 6 Plus - Test', | |
20 | 'duration': 300, | |
21 | 'filesize': 65309548, | |
22 | } | |
23 | } | |
24 | ||
70752cce | 25 | _CONFIG = 'https://video.golem.de/xml/{0}.xml' |
6a5af6ac M |
26 | _PREFIX = 'http://video.golem.de' |
27 | ||
28 | def _warn(self, fmt, *args): | |
29 | self.report_warning(fmt.format(*args), self._id) | |
30 | ||
31 | def _extract_format(self, elem): | |
32 | format_id = elem.tag | |
33 | ||
34 | url = elem.findtext('./url') | |
35 | if url == '': | |
70752cce | 36 | self._warn("{0}: url: empty, skipping", format_id) |
6a5af6ac M |
37 | return None |
38 | ||
39 | fmt = { | |
40 | 'format_id': format_id, | |
41 | 'url': compat_urlparse.urljoin(self._PREFIX, url) | |
42 | } | |
43 | ||
44 | try: | |
45 | _, ext = elem.findtext('./filename', '').rsplit('.', 1) | |
46 | except ValueError: | |
70752cce | 47 | self._warn('{0}: ext: missing extension', format_id) |
6a5af6ac M |
48 | else: |
49 | fmt['ext'] = ext | |
50 | ||
51 | filesize = elem.findtext('./filesize') | |
52 | if filesize is not None: | |
53 | try: | |
54 | fmt['filesize'] = int(filesize) | |
55 | except ValueError as e: | |
70752cce | 56 | self._warn('{0}: filesize: {1}', format_id, e) |
6a5af6ac M |
57 | |
58 | width = elem.get('width') | |
59 | if width is not None: | |
60 | try: | |
61 | fmt['width'] = int(width) | |
62 | except ValueError as e: | |
70752cce | 63 | self._warn('{0}: width: {1}', format_id, e) |
6a5af6ac M |
64 | |
65 | height = elem.get('height') | |
66 | if height is not None: | |
67 | try: | |
68 | fmt['height'] = int(height) | |
69 | except ValueError as e: | |
70752cce | 70 | self._warn('{0}: height: {1}', format_id, e) |
6a5af6ac M |
71 | |
72 | return fmt | |
73 | ||
74 | def _extract_thumbnail(self, elem): | |
75 | url = elem.findtext('./url') | |
76 | if url == '': | |
77 | return None | |
78 | thumb = { | |
79 | 'url': compat_urlparse.urljoin(self._PREFIX, url) | |
80 | } | |
81 | ||
82 | width = elem.get('width') | |
83 | if width is not None: | |
84 | try: | |
85 | thumb['width'] = int(width) | |
86 | except ValueError as e: | |
70752cce | 87 | self._warn('thumbnail: width: {0}', e) |
6a5af6ac M |
88 | |
89 | height = elem.get('height') | |
90 | if height is not None: | |
91 | try: | |
92 | thumb['height'] = int(height) | |
93 | except ValueError as e: | |
70752cce | 94 | self._warn('thumbnail: height: {0}', e) |
6a5af6ac M |
95 | |
96 | return thumb | |
97 | ||
98 | def _real_extract(self, url): | |
99 | mobj = re.match(self._VALID_URL, url) | |
100 | self._id = mobj.group('id') | |
101 | ||
102 | config = self._download_xml(self._CONFIG.format(self._id), self._id) | |
103 | ||
104 | info = { | |
105 | 'id': self._id, | |
106 | 'title': config.findtext('./title', 'golem') | |
107 | } | |
108 | ||
109 | formats = [] | |
110 | for e in config.findall('./*[url]'): | |
111 | fmt = self._extract_format(e) | |
112 | if fmt is not None: | |
113 | formats.append(fmt) | |
114 | self._sort_formats(formats) | |
115 | info['formats'] = formats | |
116 | ||
117 | thumbnails = [] | |
118 | for e in config.findall('.//teaser[url]'): | |
119 | thumb = self._extract_thumbnail(e) | |
120 | if thumb is not None: | |
121 | thumbnails.append(thumb) | |
122 | info['thumbnails'] = thumbnails | |
123 | ||
124 | playtime = config.findtext('./playtime') | |
125 | if playtime is not None: | |
126 | try: | |
127 | info['duration'] = round(float(playtime)) | |
128 | except ValueError as e: | |
70752cce | 129 | self._warn('duration: {0}', e) |
6a5af6ac M |
130 | |
131 | return info |