]>
Commit | Line | Data |
---|---|---|
47f2d01a L |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ExtractorError | |
6 | ||
7 | ||
8 | class KikaIE(InfoExtractor): | |
8c1aa28c | 9 | _VALID_URL = r'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|(?:einzel)?sendung)(?P<id>\d+).*' |
47f2d01a L |
10 | |
11 | _TESTS = [ | |
12 | { | |
8c1aa28c JMF |
13 | 'url': 'http://www.kika.de/baumhaus/videos/video19636.html', |
14 | 'md5': '4930515e36b06c111213e80d1e4aad0e', | |
47f2d01a | 15 | 'info_dict': { |
8c1aa28c | 16 | 'id': '19636', |
47f2d01a | 17 | 'ext': 'mp4', |
8c1aa28c | 18 | 'title': 'Baumhaus vom 30. Oktober 2015', |
47f2d01a L |
19 | 'description': None |
20 | } | |
21 | }, | |
22 | { | |
23 | 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', | |
24 | 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', | |
25 | 'info_dict': { | |
26 | 'id': '8182', | |
27 | 'ext': 'mp4', | |
28 | 'title': 'Beutolomäus und der geheime Weihnachtswunsch', | |
29 | 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd' | |
30 | } | |
31 | }, | |
32 | { | |
8c1aa28c JMF |
33 | 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', |
34 | 'md5': '4930515e36b06c111213e80d1e4aad0e', | |
47f2d01a | 35 | 'info_dict': { |
8c1aa28c | 36 | 'id': '19636', |
47f2d01a | 37 | 'ext': 'mp4', |
8c1aa28c | 38 | 'title': 'Baumhaus vom 30. Oktober 2015', |
47f2d01a L |
39 | 'description': None |
40 | } | |
41 | }, | |
42 | { | |
8c1aa28c | 43 | 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html', |
47f2d01a L |
44 | 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', |
45 | 'info_dict': { | |
46 | 'id': '8182', | |
47 | 'ext': 'mp4', | |
48 | 'title': 'Beutolomäus und der geheime Weihnachtswunsch', | |
49 | 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd' | |
50 | } | |
51 | } | |
52 | ] | |
53 | ||
54 | def _real_extract(self, url): | |
55 | # broadcast_id may be the same as the video_id | |
56 | broadcast_id = self._match_id(url) | |
57 | webpage = self._download_webpage(url, broadcast_id) | |
58 | ||
59 | xml_re = r'sectionArticle[ "](?:(?!sectionA[ "])(?:.|\n))*?dataURL:\'(?:/[a-z-]+?)*?/video(\d+)-avCustom\.xml' | |
60 | video_id = self._search_regex(xml_re, webpage, "xml_url", default=None) | |
61 | if not video_id: | |
62 | # Video is not available online | |
63 | err_msg = 'Video %s is not available online' % broadcast_id | |
64 | raise ExtractorError(err_msg, expected=True) | |
65 | ||
66 | xml_url = 'http://www.kika.de/video%s-avCustom.xml' % (video_id) | |
67 | xml_tree = self._download_xml(xml_url, video_id) | |
68 | ||
69 | title = xml_tree.find('title').text | |
70 | webpage_url = xml_tree.find('htmlUrl').text | |
71 | ||
72 | # Try to get the description, not available for all videos | |
73 | try: | |
74 | broadcast_elem = xml_tree.find('broadcast') | |
75 | description = broadcast_elem.find('broadcastDescription').text | |
76 | except AttributeError: | |
77 | # No description available | |
78 | description = None | |
79 | ||
80 | # duration string format is mm:ss (even if it is >= 1 hour, e.g. 78:42) | |
81 | tmp = xml_tree.find('duration').text.split(':') | |
82 | duration = int(tmp[0]) * 60 + int(tmp[1]) | |
83 | ||
84 | formats_list = [] | |
85 | for elem in xml_tree.find('assets'): | |
86 | format_dict = {} | |
87 | format_dict['url'] = elem.find('progressiveDownloadUrl').text | |
88 | format_dict['ext'] = elem.find('mediaType').text.lower() | |
89 | format_dict['format'] = elem.find('profileName').text | |
892015b0 L |
90 | format_dict['width'] = int(elem.find('frameWidth').text) |
91 | format_dict['height'] = int(elem.find('frameHeight').text) | |
92 | format_dict['resolution'] = '%dx%d' % (format_dict['width'], | |
93 | format_dict['height']) | |
47f2d01a L |
94 | format_dict['abr'] = int(elem.find('bitrateAudio').text) |
95 | format_dict['vbr'] = int(elem.find('bitrateVideo').text) | |
96 | format_dict['tbr'] = format_dict['abr'] + format_dict['vbr'] | |
97 | format_dict['filesize'] = int(elem.find('fileSize').text) | |
98 | ||
892015b0 | 99 | formats_list.append(format_dict) |
47f2d01a L |
100 | |
101 | # Sort by resolution (=quality) | |
892015b0 | 102 | formats_list.sort(key=lambda x: x['width'] * x['height']) |
47f2d01a L |
103 | |
104 | return { | |
105 | 'id': video_id, | |
106 | 'title': title, | |
107 | 'description': description, | |
892015b0 | 108 | 'formats': formats_list, |
47f2d01a L |
109 | 'duration': duration, |
110 | 'webpage_url': webpage_url | |
111 | } |