]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..compat import compat_urlparse | |
6 | from ..utils import ( | |
7 | fix_xml_ampersands, | |
8 | float_or_none, | |
9 | xpath_with_ns, | |
10 | xpath_text, | |
11 | ) | |
12 | ||
13 | ||
14 | class KarriereVideosIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)' | |
16 | _TESTS = [{ | |
17 | 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin', | |
18 | 'info_dict': { | |
19 | 'id': '32c91', | |
20 | 'ext': 'flv', | |
21 | 'title': 'AltenpflegerIn', | |
22 | 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2', | |
23 | 'thumbnail': r're:^http://.*\.png', | |
24 | }, | |
25 | 'params': { | |
26 | # rtmp download | |
27 | 'skip_download': True, | |
28 | } | |
29 | }, { | |
30 | # broken ampersands | |
31 | 'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun', | |
32 | 'info_dict': { | |
33 | 'id': '5sniu', | |
34 | 'ext': 'flv', | |
35 | 'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"', | |
36 | 'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33', | |
37 | 'thumbnail': r're:^http://.*\.png', | |
38 | }, | |
39 | 'params': { | |
40 | # rtmp download | |
41 | 'skip_download': True, | |
42 | } | |
43 | }] | |
44 | ||
45 | def _real_extract(self, url): | |
46 | video_id = self._match_id(url) | |
47 | ||
48 | webpage = self._download_webpage(url, video_id) | |
49 | ||
50 | title = (self._html_search_meta('title', webpage, default=None) | |
51 | or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title')) | |
52 | ||
53 | video_id = self._search_regex( | |
54 | r'/config/video/(.+?)\.xml', webpage, 'video id') | |
55 | # Server returns malformed headers | |
56 | # Force Accept-Encoding: * to prevent gzipped results | |
57 | playlist = self._download_xml( | |
58 | 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id, | |
59 | video_id, transform_source=fix_xml_ampersands, | |
60 | headers={'Accept-Encoding': '*'}) | |
61 | ||
62 | NS_MAP = { | |
63 | 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' | |
64 | } | |
65 | ||
66 | def ns(path): | |
67 | return xpath_with_ns(path, NS_MAP) | |
68 | ||
69 | item = playlist.find('./tracklist/item') | |
70 | video_file = xpath_text( | |
71 | item, ns('./jwplayer:file'), 'video url', fatal=True) | |
72 | streamer = xpath_text( | |
73 | item, ns('./jwplayer:streamer'), 'streamer', fatal=True) | |
74 | ||
75 | uploader = xpath_text( | |
76 | item, ns('./jwplayer:author'), 'uploader') | |
77 | duration = float_or_none( | |
78 | xpath_text(item, ns('./jwplayer:duration'), 'duration')) | |
79 | ||
80 | description = self._html_search_regex( | |
81 | r'(?s)<div class="leadtext">(.+?)</div>', | |
82 | webpage, 'description') | |
83 | ||
84 | thumbnail = self._html_search_meta( | |
85 | 'thumbnail', webpage, 'thumbnail') | |
86 | if thumbnail: | |
87 | thumbnail = compat_urlparse.urljoin(url, thumbnail) | |
88 | ||
89 | return { | |
90 | 'id': video_id, | |
91 | 'url': streamer.replace('rtmpt', 'rtmp'), | |
92 | 'play_path': 'mp4:%s' % video_file, | |
93 | 'ext': 'flv', | |
94 | 'title': title, | |
95 | 'description': description, | |
96 | 'thumbnail': thumbnail, | |
97 | 'uploader': uploader, | |
98 | 'duration': duration, | |
99 | } |