]>
Commit | Line | Data |
---|---|---|
22f5f5c6 AJ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
4de3cb88 | 5 | |
22f5f5c6 | 6 | from .common import InfoExtractor |
4de3cb88 | 7 | from ..utils import merge_dicts |
22f5f5c6 AJ |
8 | |
9 | ||
10 | class MallTVIE(InfoExtractor): | |
d5147b65 | 11 | _VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' |
4de3cb88 S |
12 | _TESTS = [{ |
13 | 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', | |
14 | 'md5': '1c4a37f080e1f3023103a7b43458e518', | |
15 | 'info_dict': { | |
16 | 'id': 't0zzt0', | |
17 | 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', | |
18 | 'ext': 'mp4', | |
19 | 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', | |
20 | 'description': 'md5:25fc0ec42a72ba602b602c683fa29deb', | |
21 | 'duration': 216, | |
22 | 'timestamp': 1538870400, | |
23 | 'upload_date': '20181007', | |
24 | 'view_count': int, | |
22f5f5c6 | 25 | } |
4de3cb88 S |
26 | }, { |
27 | 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', | |
28 | 'only_matching': True, | |
d5147b65 MD |
29 | }, { |
30 | 'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka', | |
31 | 'only_matching': True, | |
4de3cb88 | 32 | }] |
22f5f5c6 AJ |
33 | |
34 | def _real_extract(self, url): | |
35 | display_id = self._match_id(url) | |
4de3cb88 S |
36 | |
37 | webpage = self._download_webpage( | |
38 | url, display_id, headers=self.geo_verification_headers()) | |
39 | ||
40 | SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b' | |
41 | video_id = self._search_regex( | |
42 | SOURCE_RE, webpage, 'video id', group='id') | |
43 | ||
44 | media = self._parse_html5_media_entries( | |
45 | url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id, | |
46 | m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0] | |
47 | ||
22f5f5c6 | 48 | info = self._search_json_ld(webpage, video_id, default={}) |
22f5f5c6 | 49 | |
4de3cb88 S |
50 | return merge_dicts(media, info, { |
51 | 'id': video_id, | |
52 | 'display_id': display_id, | |
53 | 'title': self._og_search_title(webpage, default=None) or display_id, | |
54 | 'description': self._og_search_description(webpage, default=None), | |
55 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
56 | }) |