]>
Commit | Line | Data |
---|---|---|
cd9ff4ec RA |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | clean_html, | |
6 | determine_ext, | |
7 | int_or_none, | |
8 | js_to_json, | |
9 | mimetype2ext, | |
10 | parse_filesize, | |
11 | ) | |
12 | ||
13 | ||
14 | class MassengeschmackTVIE(InfoExtractor): | |
15 | IE_NAME = 'massengeschmack.tv' | |
16 | _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)' | |
17 | ||
18 | _TEST = { | |
19 | 'url': 'https://massengeschmack.tv/play/fktv202', | |
81f46ac5 | 20 | 'md5': '9996f314994a49fefe5f39aa1b07ae21', |
cd9ff4ec RA |
21 | 'info_dict': { |
22 | 'id': 'fktv202', | |
23 | 'ext': 'mp4', | |
81f46ac5 SK |
24 | 'title': 'Fernsehkritik-TV #202', |
25 | 'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg' | |
cd9ff4ec RA |
26 | }, |
27 | } | |
28 | ||
29 | def _real_extract(self, url): | |
30 | episode = self._match_id(url) | |
31 | ||
32 | webpage = self._download_webpage(url, episode) | |
cd9ff4ec RA |
33 | sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) |
34 | ||
35 | formats = [] | |
36 | for source in sources: | |
37 | furl = source.get('src') | |
38 | if not furl: | |
39 | continue | |
40 | furl = self._proto_relative_url(furl) | |
41 | ext = determine_ext(furl) or mimetype2ext(source.get('type')) | |
42 | if ext == 'm3u8': | |
43 | formats.extend(self._extract_m3u8_formats( | |
44 | furl, episode, 'mp4', 'm3u8_native', | |
45 | m3u8_id='hls', fatal=False)) | |
46 | else: | |
47 | formats.append({ | |
48 | 'url': furl, | |
49 | 'format_id': determine_ext(furl), | |
50 | }) | |
51 | ||
52 | for (durl, format_id, width, height, filesize) in re.findall(r'''(?x) | |
53 | <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*? | |
54 | <strong>(?P<format_id>.+?)</strong>.*? | |
55 | <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small> | |
56 | ''', webpage): | |
57 | formats.append({ | |
58 | 'url': durl, | |
59 | 'format_id': format_id, | |
60 | 'width': int_or_none(width), | |
61 | 'height': int_or_none(height), | |
62 | 'filesize': parse_filesize(filesize), | |
63 | 'vcodec': 'none' if format_id.startswith('Audio') else None, | |
64 | }) | |
65 | ||
cd9ff4ec RA |
66 | return { |
67 | 'id': episode, | |
81f46ac5 SK |
68 | 'title': clean_html(self._html_search_regex( |
69 | r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)), | |
cd9ff4ec | 70 | 'formats': formats, |
81f46ac5 | 71 | 'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False), |
cd9ff4ec | 72 | } |