]>
Commit | Line | Data |
---|---|---|
d78c834e | 1 | # coding: utf-8 |
725652e9 MH |
2 | from __future__ import unicode_literals |
3 | ||
4 | from .common import InfoExtractor | |
d78c834e S |
5 | from ..compat import compat_urlparse |
6 | from ..utils import ( | |
7 | fix_xml_ampersands, | |
8 | float_or_none, | |
9 | xpath_with_ns, | |
10 | xpath_text, | |
11 | ) | |
725652e9 MH |
12 | |
13 | ||
14 | class KarriereVideosIE(InfoExtractor): | |
5886b38d | 15 | _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)' |
d78c834e | 16 | _TESTS = [{ |
725652e9 MH |
17 | 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin', |
18 | 'info_dict': { | |
d78c834e S |
19 | 'id': '32c91', |
20 | 'ext': 'flv', | |
725652e9 | 21 | 'title': 'AltenpflegerIn', |
d78c834e | 22 | 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2', |
ec85ded8 | 23 | 'thumbnail': r're:^http://.*\.png', |
725652e9 MH |
24 | }, |
25 | 'params': { | |
d78c834e S |
26 | # rtmp download |
27 | 'skip_download': True, | |
725652e9 | 28 | } |
d78c834e S |
29 | }, { |
30 | # broken ampersands | |
31 | 'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun', | |
32 | 'info_dict': { | |
33 | 'id': '5sniu', | |
34 | 'ext': 'flv', | |
35 | 'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"', | |
36 | 'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33', | |
ec85ded8 | 37 | 'thumbnail': r're:^http://.*\.png', |
d78c834e S |
38 | }, |
39 | 'params': { | |
40 | # rtmp download | |
41 | 'skip_download': True, | |
42 | } | |
43 | }] | |
725652e9 MH |
44 | |
45 | def _real_extract(self, url): | |
46 | video_id = self._match_id(url) | |
d78c834e | 47 | |
725652e9 MH |
48 | webpage = self._download_webpage(url, video_id) |
49 | ||
3089bc74 S |
50 | title = (self._html_search_meta('title', webpage, default=None) |
51 | or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title')) | |
725652e9 | 52 | |
d78c834e S |
53 | video_id = self._search_regex( |
54 | r'/config/video/(.+?)\.xml', webpage, 'video id') | |
f141fefa YCH |
55 | # Server returns malformed headers |
56 | # Force Accept-Encoding: * to prevent gzipped results | |
725652e9 | 57 | playlist = self._download_xml( |
d78c834e | 58 | 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id, |
f141fefa YCH |
59 | video_id, transform_source=fix_xml_ampersands, |
60 | headers={'Accept-Encoding': '*'}) | |
725652e9 | 61 | |
d78c834e S |
62 | NS_MAP = { |
63 | 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' | |
64 | } | |
65 | ||
66 | def ns(path): | |
67 | return xpath_with_ns(path, NS_MAP) | |
68 | ||
69 | item = playlist.find('./tracklist/item') | |
70 | video_file = xpath_text( | |
71 | item, ns('./jwplayer:file'), 'video url', fatal=True) | |
72 | streamer = xpath_text( | |
73 | item, ns('./jwplayer:streamer'), 'streamer', fatal=True) | |
74 | ||
75 | uploader = xpath_text( | |
76 | item, ns('./jwplayer:author'), 'uploader') | |
77 | duration = float_or_none( | |
78 | xpath_text(item, ns('./jwplayer:duration'), 'duration')) | |
79 | ||
80 | description = self._html_search_regex( | |
81 | r'(?s)<div class="leadtext">(.+?)</div>', | |
82 | webpage, 'description') | |
725652e9 | 83 | |
d78c834e S |
84 | thumbnail = self._html_search_meta( |
85 | 'thumbnail', webpage, 'thumbnail') | |
86 | if thumbnail: | |
87 | thumbnail = compat_urlparse.urljoin(url, thumbnail) | |
725652e9 MH |
88 | |
89 | return { | |
90 | 'id': video_id, | |
d78c834e S |
91 | 'url': streamer.replace('rtmpt', 'rtmp'), |
92 | 'play_path': 'mp4:%s' % video_file, | |
93 | 'ext': 'flv', | |
94 | 'title': title, | |
725652e9 | 95 | 'description': description, |
d78c834e S |
96 | 'thumbnail': thumbnail, |
97 | 'uploader': uploader, | |
98 | 'duration': duration, | |
725652e9 | 99 | } |