]>
Commit | Line | Data |
---|---|---|
be2d40a5 TG |
1 | from __future__ import unicode_literals |
2 | ||
bb5ebd44 S |
3 | import re |
4 | ||
be2d40a5 | 5 | from .common import InfoExtractor |
4e2743ab | 6 | from ..utils import float_or_none |
be2d40a5 TG |
7 | |
8 | ||
9 | class CanvasIE(InfoExtractor): | |
bb5ebd44 | 10 | _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' |
6eff2605 | 11 | _TESTS = [{ |
be2d40a5 TG |
12 | 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', |
13 | 'md5': 'ea838375a547ac787d4064d8c7860a6c', | |
14 | 'info_dict': { | |
4e2743ab S |
15 | 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', |
16 | 'display_id': 'de-afspraak-veilt-voor-de-warmste-week', | |
be2d40a5 | 17 | 'ext': 'mp4', |
4e2743ab S |
18 | 'title': 'De afspraak veilt voor de Warmste Week', |
19 | 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', | |
ec85ded8 | 20 | 'thumbnail': r're:^https?://.*\.jpg$', |
4e2743ab | 21 | 'duration': 49.02, |
be2d40a5 | 22 | } |
6eff2605 S |
23 | }, { |
24 | # with subtitles | |
25 | 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', | |
26 | 'info_dict': { | |
27 | 'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625', | |
28 | 'display_id': 'pieter-0167', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Pieter 0167', | |
31 | 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', | |
ec85ded8 | 32 | 'thumbnail': r're:^https?://.*\.jpg$', |
6eff2605 S |
33 | 'duration': 2553.08, |
34 | 'subtitles': { | |
35 | 'nl': [{ | |
36 | 'ext': 'vtt', | |
37 | }], | |
38 | }, | |
39 | }, | |
40 | 'params': { | |
41 | 'skip_download': True, | |
42 | } | |
bb5ebd44 S |
43 | }, { |
44 | 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', | |
45 | 'info_dict': { | |
46 | 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', | |
47 | 'display_id': 'herbekijk-sorry-voor-alles', | |
48 | 'ext': 'mp4', | |
49 | 'title': 'Herbekijk Sorry voor alles', | |
50 | 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', | |
ec85ded8 | 51 | 'thumbnail': r're:^https?://.*\.jpg$', |
bb5ebd44 S |
52 | 'duration': 3788.06, |
53 | }, | |
54 | 'params': { | |
55 | 'skip_download': True, | |
56 | } | |
57 | }, { | |
58 | 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', | |
59 | 'only_matching': True, | |
6eff2605 | 60 | }] |
be2d40a5 TG |
61 | |
62 | def _real_extract(self, url): | |
bb5ebd44 S |
63 | mobj = re.match(self._VALID_URL, url) |
64 | site_id, display_id = mobj.group('site_id'), mobj.group('id') | |
be2d40a5 | 65 | |
4e2743ab | 66 | webpage = self._download_webpage(url, display_id) |
be2d40a5 | 67 | |
bb5ebd44 | 68 | title = (self._search_regex( |
4e2743ab | 69 | r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', |
bb5ebd44 S |
70 | webpage, 'title', default=None) or self._og_search_title( |
71 | webpage)).strip() | |
4e2743ab S |
72 | |
73 | video_id = self._html_search_regex( | |
490b7557 | 74 | r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id') |
4e2743ab S |
75 | |
76 | data = self._download_json( | |
bb5ebd44 S |
77 | 'https://mediazone.vrt.be/api/v1/%s/assets/%s' |
78 | % (site_id, video_id), display_id) | |
be2d40a5 TG |
79 | |
80 | formats = [] | |
81 | for target in data['targetUrls']: | |
4e2743ab S |
82 | format_url, format_type = target.get('url'), target.get('type') |
83 | if not format_url or not format_type: | |
84 | continue | |
85 | if format_type == 'HLS': | |
86 | formats.extend(self._extract_m3u8_formats( | |
87 | format_url, display_id, entry_protocol='m3u8_native', | |
88 | ext='mp4', preference=0, fatal=False, m3u8_id=format_type)) | |
89 | elif format_type == 'HDS': | |
90 | formats.extend(self._extract_f4m_formats( | |
91 | format_url, display_id, f4m_id=format_type, fatal=False)) | |
67dcbc0a RA |
92 | elif format_type == 'MPEG_DASH': |
93 | formats.extend(self._extract_mpd_formats( | |
94 | format_url, display_id, mpd_id=format_type, fatal=False)) | |
4e2743ab S |
95 | else: |
96 | formats.append({ | |
97 | 'format_id': format_type, | |
98 | 'url': format_url, | |
99 | }) | |
be2d40a5 | 100 | self._sort_formats(formats) |
fd7a3ea4 | 101 | |
8d3eeb36 | 102 | subtitles = {} |
fd7a3ea4 S |
103 | subtitle_urls = data.get('subtitleUrls') |
104 | if isinstance(subtitle_urls, list): | |
105 | for subtitle in subtitle_urls: | |
106 | subtitle_url = subtitle.get('url') | |
107 | if subtitle_url and subtitle.get('type') == 'CLOSED': | |
108 | subtitles.setdefault('nl', []).append({'url': subtitle_url}) | |
4e2743ab | 109 | |
be2d40a5 TG |
110 | return { |
111 | 'id': video_id, | |
4e2743ab | 112 | 'display_id': display_id, |
be2d40a5 | 113 | 'title': title, |
4e2743ab | 114 | 'description': self._og_search_description(webpage), |
be2d40a5 | 115 | 'formats': formats, |
4e2743ab S |
116 | 'duration': float_or_none(data.get('duration'), 1000), |
117 | 'thumbnail': data.get('posterImageUrl'), | |
8d3eeb36 | 118 | 'subtitles': subtitles, |
be2d40a5 | 119 | } |