]>
jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/kika.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
5 from ..utils
import ExtractorError
8 class KikaIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|(?:einzel)?sendung)(?P<id>\d+).*'
13 'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
14 'md5': '4930515e36b06c111213e80d1e4aad0e',
18 'title': 'Baumhaus vom 30. Oktober 2015',
23 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
24 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
28 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
29 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd'
33 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
34 'md5': '4930515e36b06c111213e80d1e4aad0e',
38 'title': 'Baumhaus vom 30. Oktober 2015',
43 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
44 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
48 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
49 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd'
54 def _real_extract(self
, url
):
55 # broadcast_id may be the same as the video_id
56 broadcast_id
= self
._match
_id
(url
)
57 webpage
= self
._download
_webpage
(url
, broadcast_id
)
59 xml_re
= r
'sectionArticle[ "](?:(?!sectionA[ "])(?:.|\n))*?dataURL:\'(?
:/[a
-z
-]+?
)*?
/video(\d
+)-avCustom\
.xml
'
60 video_id = self._search_regex(xml_re, webpage, "xml_url", default=None)
62 # Video is not available online
63 err_msg = 'Video
%s is not available online
' % broadcast_id
64 raise ExtractorError(err_msg, expected=True)
66 xml_url = 'http
://www
.kika
.de
/video
%s-avCustom
.xml
' % (video_id)
67 xml_tree = self._download_xml(xml_url, video_id)
69 title = xml_tree.find('title
').text
70 webpage_url = xml_tree.find('htmlUrl
').text
72 # Try to get the description, not available for all videos
74 broadcast_elem = xml_tree.find('broadcast
')
75 description = broadcast_elem.find('broadcastDescription
').text
76 except AttributeError:
77 # No description available
80 # duration string format is mm:ss (even if it is >= 1 hour, e.g. 78:42)
81 tmp = xml_tree.find('duration
').text.split(':')
82 duration = int(tmp[0]) * 60 + int(tmp[1])
85 for elem in xml_tree.find('assets
'):
87 format_dict['url
'] = elem.find('progressiveDownloadUrl
').text
88 format_dict['ext
'] = elem.find('mediaType
').text.lower()
89 format_dict['format
'] = elem.find('profileName
').text
90 format_dict['width
'] = int(elem.find('frameWidth
').text)
91 format_dict['height
'] = int(elem.find('frameHeight
').text)
92 format_dict['resolution
'] = '%dx%d' % (format_dict['width
'],
93 format_dict['height
'])
94 format_dict['abr
'] = int(elem.find('bitrateAudio
').text)
95 format_dict['vbr
'] = int(elem.find('bitrateVideo
').text)
96 format_dict['tbr
'] = format_dict['abr
'] + format_dict['vbr
']
97 format_dict['filesize
'] = int(elem.find('fileSize
').text)
99 formats_list.append(format_dict)
101 # Sort by resolution (=quality)
102 formats_list.sort(key=lambda x: x['width
'] * x['height
'])
107 'description
': description,
108 'formats
': formats_list,
109 'duration
': duration,
110 'webpage_url
': webpage_url