]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | clean_html, | |
6 | determine_ext, | |
7 | js_to_json, | |
8 | ) | |
9 | ||
10 | ||
11 | class FKTVIE(InfoExtractor): | |
12 | IE_NAME = 'fernsehkritik.tv' | |
13 | _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?' | |
14 | ||
15 | _TEST = { | |
16 | 'url': 'http://fernsehkritik.tv/folge-1', | |
17 | 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79', | |
18 | 'info_dict': { | |
19 | 'id': '1', | |
20 | 'ext': 'mp4', | |
21 | 'title': 'Folge 1 vom 10. April 2007', | |
22 | 'thumbnail': 're:^https?://.*\.jpg$', | |
23 | }, | |
24 | } | |
25 | ||
26 | def _real_extract(self, url): | |
27 | episode = self._match_id(url) | |
28 | ||
29 | webpage = self._download_webpage( | |
30 | 'http://fernsehkritik.tv/folge-%s/play' % episode, episode) | |
31 | title = clean_html(self._html_search_regex( | |
32 | '<h3>([^<]+)</h3>', webpage, 'title')) | |
33 | thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) | |
34 | sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) | |
35 | ||
36 | formats = [] | |
37 | for source in sources: | |
38 | furl = source.get('src') | |
39 | if furl: | |
40 | formats.append({ | |
41 | 'url': furl, | |
42 | 'format_id': determine_ext(furl), | |
43 | }) | |
44 | self._sort_formats(formats) | |
45 | ||
46 | return { | |
47 | 'id': episode, | |
48 | 'title': title, | |
49 | 'formats': formats, | |
50 | 'thumbnail': thumbnail, | |
51 | } |