]>
Commit | Line | Data |
---|---|---|
3a30508b S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
a0904c5d YCH |
5 | from ..utils import ( |
6 | determine_ext, | |
7 | remove_end, | |
8 | ) | |
3a30508b S |
9 | |
10 | ||
11 | class TelegraafIE(InfoExtractor): | |
12 | _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html' | |
13 | _TEST = { | |
14 | 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', | |
3a30508b S |
15 | 'info_dict': { |
16 | 'id': '24353229', | |
17 | 'ext': 'mp4', | |
18 | 'title': 'Tikibad ontruimd wegens brand', | |
19 | 'description': 'md5:05ca046ff47b931f9b04855015e163a4', | |
ec85ded8 | 20 | 'thumbnail': r're:^https?://.*\.jpg$', |
3a30508b S |
21 | 'duration': 33, |
22 | }, | |
a0904c5d YCH |
23 | 'params': { |
24 | # m3u8 download | |
25 | 'skip_download': True, | |
26 | }, | |
3a30508b S |
27 | } |
28 | ||
29 | def _real_extract(self, url): | |
a0904c5d | 30 | video_id = self._match_id(url) |
3a30508b | 31 | |
a0904c5d | 32 | webpage = self._download_webpage(url, video_id) |
3a30508b | 33 | |
a0904c5d YCH |
34 | player_url = self._html_search_regex( |
35 | r'<iframe[^>]+src="([^"]+")', webpage, 'player URL') | |
36 | player_page = self._download_webpage( | |
37 | player_url, video_id, note='Download player webpage') | |
3a30508b | 38 | playlist_url = self._search_regex( |
a0904c5d YCH |
39 | r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL') |
40 | playlist_data = self._download_json(playlist_url, video_id) | |
41 | ||
42 | item = playlist_data['items'][0] | |
43 | formats = [] | |
44 | locations = item['locations'] | |
45 | for location in locations.get('adaptive', []): | |
46 | manifest_url = location['src'] | |
47 | ext = determine_ext(manifest_url) | |
48 | if ext == 'm3u8': | |
49 | formats.extend(self._extract_m3u8_formats( | |
f75e6890 | 50 | manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) |
a0904c5d | 51 | elif ext == 'mpd': |
d9cb92c8 S |
52 | formats.extend(self._extract_mpd_formats( |
53 | manifest_url, video_id, mpd_id='dash', fatal=False)) | |
a0904c5d YCH |
54 | else: |
55 | self.report_warning('Unknown adaptive format %s' % ext) | |
56 | for location in locations.get('progressive', []): | |
57 | formats.append({ | |
58 | 'url': location['sources'][0]['src'], | |
59 | 'width': location.get('width'), | |
60 | 'height': location.get('height'), | |
61 | 'format_id': 'http-%s' % location['label'], | |
62 | }) | |
63 | ||
64 | self._sort_formats(formats) | |
3a30508b | 65 | |
3a30508b S |
66 | title = remove_end(self._og_search_title(webpage), ' - VIDEO') |
67 | description = self._og_search_description(webpage) | |
a0904c5d YCH |
68 | duration = item.get('duration') |
69 | thumbnail = item.get('poster') | |
3a30508b | 70 | |
a0904c5d YCH |
71 | return { |
72 | 'id': video_id, | |
73 | 'title': title, | |
74 | 'description': description, | |
75 | 'formats': formats, | |
76 | 'duration': duration, | |
77 | 'thumbnail': thumbnail, | |
78 | } |