]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/kika.py
[kika] Replace non working tests and recognize 'einzelsendung' urls.
[yt-dlp.git] / youtube_dl / extractor / kika.py
CommitLineData
47f2d01a
L
1# coding: utf-8
2from __future__ import unicode_literals
3
4from .common import InfoExtractor
5from ..utils import ExtractorError
6
7
8class KikaIE(InfoExtractor):
8c1aa28c 9 _VALID_URL = r'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|(?:einzel)?sendung)(?P<id>\d+).*'
47f2d01a
L
10
11 _TESTS = [
12 {
8c1aa28c
JMF
13 'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
14 'md5': '4930515e36b06c111213e80d1e4aad0e',
47f2d01a 15 'info_dict': {
8c1aa28c 16 'id': '19636',
47f2d01a 17 'ext': 'mp4',
8c1aa28c 18 'title': 'Baumhaus vom 30. Oktober 2015',
47f2d01a
L
19 'description': None
20 }
21 },
22 {
23 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
24 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
25 'info_dict': {
26 'id': '8182',
27 'ext': 'mp4',
28 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
29 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd'
30 }
31 },
32 {
8c1aa28c
JMF
33 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
34 'md5': '4930515e36b06c111213e80d1e4aad0e',
47f2d01a 35 'info_dict': {
8c1aa28c 36 'id': '19636',
47f2d01a 37 'ext': 'mp4',
8c1aa28c 38 'title': 'Baumhaus vom 30. Oktober 2015',
47f2d01a
L
39 'description': None
40 }
41 },
42 {
8c1aa28c 43 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
47f2d01a
L
44 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
45 'info_dict': {
46 'id': '8182',
47 'ext': 'mp4',
48 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
49 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd'
50 }
51 }
52 ]
53
54 def _real_extract(self, url):
55 # broadcast_id may be the same as the video_id
56 broadcast_id = self._match_id(url)
57 webpage = self._download_webpage(url, broadcast_id)
58
59 xml_re = r'sectionArticle[ "](?:(?!sectionA[ "])(?:.|\n))*?dataURL:\'(?:/[a-z-]+?)*?/video(\d+)-avCustom\.xml'
60 video_id = self._search_regex(xml_re, webpage, "xml_url", default=None)
61 if not video_id:
62 # Video is not available online
63 err_msg = 'Video %s is not available online' % broadcast_id
64 raise ExtractorError(err_msg, expected=True)
65
66 xml_url = 'http://www.kika.de/video%s-avCustom.xml' % (video_id)
67 xml_tree = self._download_xml(xml_url, video_id)
68
69 title = xml_tree.find('title').text
70 webpage_url = xml_tree.find('htmlUrl').text
71
72 # Try to get the description, not available for all videos
73 try:
74 broadcast_elem = xml_tree.find('broadcast')
75 description = broadcast_elem.find('broadcastDescription').text
76 except AttributeError:
77 # No description available
78 description = None
79
80 # duration string format is mm:ss (even if it is >= 1 hour, e.g. 78:42)
81 tmp = xml_tree.find('duration').text.split(':')
82 duration = int(tmp[0]) * 60 + int(tmp[1])
83
84 formats_list = []
85 for elem in xml_tree.find('assets'):
86 format_dict = {}
87 format_dict['url'] = elem.find('progressiveDownloadUrl').text
88 format_dict['ext'] = elem.find('mediaType').text.lower()
89 format_dict['format'] = elem.find('profileName').text
892015b0
L
90 format_dict['width'] = int(elem.find('frameWidth').text)
91 format_dict['height'] = int(elem.find('frameHeight').text)
92 format_dict['resolution'] = '%dx%d' % (format_dict['width'],
93 format_dict['height'])
47f2d01a
L
94 format_dict['abr'] = int(elem.find('bitrateAudio').text)
95 format_dict['vbr'] = int(elem.find('bitrateVideo').text)
96 format_dict['tbr'] = format_dict['abr'] + format_dict['vbr']
97 format_dict['filesize'] = int(elem.find('fileSize').text)
98
892015b0 99 formats_list.append(format_dict)
47f2d01a
L
100
101 # Sort by resolution (=quality)
892015b0 102 formats_list.sort(key=lambda x: x['width'] * x['height'])
47f2d01a
L
103
104 return {
105 'id': video_id,
106 'title': title,
107 'description': description,
892015b0 108 'formats': formats_list,
47f2d01a
L
109 'duration': duration,
110 'webpage_url': webpage_url
111 }