]>
Commit | Line | Data |
---|---|---|
cd9ff4ec RA |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | clean_html, | |
6 | determine_ext, | |
7 | int_or_none, | |
8 | js_to_json, | |
9 | mimetype2ext, | |
10 | parse_filesize, | |
11 | ) | |
12 | ||
13 | ||
14 | class MassengeschmackTVIE(InfoExtractor): | |
15 | IE_NAME = 'massengeschmack.tv' | |
16 | _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)' | |
17 | ||
18 | _TEST = { | |
19 | 'url': 'https://massengeschmack.tv/play/fktv202', | |
20 | 'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3', | |
21 | 'info_dict': { | |
22 | 'id': 'fktv202', | |
23 | 'ext': 'mp4', | |
24 | 'title': 'Fernsehkritik-TV - Folge 202', | |
25 | }, | |
26 | } | |
27 | ||
28 | def _real_extract(self, url): | |
29 | episode = self._match_id(url) | |
30 | ||
31 | webpage = self._download_webpage(url, episode) | |
32 | title = clean_html(self._html_search_regex( | |
33 | '<h3>([^<]+)</h3>', webpage, 'title')) | |
34 | thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) | |
35 | sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) | |
36 | ||
37 | formats = [] | |
38 | for source in sources: | |
39 | furl = source.get('src') | |
40 | if not furl: | |
41 | continue | |
42 | furl = self._proto_relative_url(furl) | |
43 | ext = determine_ext(furl) or mimetype2ext(source.get('type')) | |
44 | if ext == 'm3u8': | |
45 | formats.extend(self._extract_m3u8_formats( | |
46 | furl, episode, 'mp4', 'm3u8_native', | |
47 | m3u8_id='hls', fatal=False)) | |
48 | else: | |
49 | formats.append({ | |
50 | 'url': furl, | |
51 | 'format_id': determine_ext(furl), | |
52 | }) | |
53 | ||
54 | for (durl, format_id, width, height, filesize) in re.findall(r'''(?x) | |
55 | <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*? | |
56 | <strong>(?P<format_id>.+?)</strong>.*? | |
57 | <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small> | |
58 | ''', webpage): | |
59 | formats.append({ | |
60 | 'url': durl, | |
61 | 'format_id': format_id, | |
62 | 'width': int_or_none(width), | |
63 | 'height': int_or_none(height), | |
64 | 'filesize': parse_filesize(filesize), | |
65 | 'vcodec': 'none' if format_id.startswith('Audio') else None, | |
66 | }) | |
67 | ||
cd9ff4ec RA |
68 | return { |
69 | 'id': episode, | |
70 | 'title': title, | |
71 | 'formats': formats, | |
72 | 'thumbnail': thumbnail, | |
73 | } |