]>
Commit | Line | Data |
---|---|---|
be2d40a5 TG |
1 | from __future__ import unicode_literals |
2 | ||
bb5ebd44 S |
3 | import re |
4 | ||
be2d40a5 | 5 | from .common import InfoExtractor |
117589df S |
6 | from ..utils import ( |
7 | float_or_none, | |
8 | strip_or_none, | |
9 | ) | |
be2d40a5 TG |
10 | |
11 | ||
12 | class CanvasIE(InfoExtractor): | |
117589df S |
13 | _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)' |
14 | _TESTS = [{ | |
15 | 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | |
16 | 'md5': '90139b746a0a9bd7bb631283f6e2a64e', | |
17 | 'info_dict': { | |
18 | 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | |
19 | 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | |
20 | 'ext': 'flv', | |
21 | 'title': 'Nachtwacht: De Greystook', | |
22 | 'description': 'md5:1db3f5dc4c7109c821261e7512975be7', | |
23 | 'thumbnail': r're:^https?://.*\.jpg$', | |
24 | 'duration': 1468.03, | |
25 | }, | |
26 | 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], | |
27 | }, { | |
28 | 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', | |
29 | 'only_matching': True, | |
30 | }] | |
31 | ||
32 | def _real_extract(self, url): | |
33 | mobj = re.match(self._VALID_URL, url) | |
34 | site_id, video_id = mobj.group('site_id'), mobj.group('id') | |
35 | ||
36 | data = self._download_json( | |
37 | 'https://mediazone.vrt.be/api/v1/%s/assets/%s' | |
38 | % (site_id, video_id), video_id) | |
39 | ||
40 | title = data['title'] | |
41 | description = data.get('description') | |
42 | ||
43 | formats = [] | |
44 | for target in data['targetUrls']: | |
45 | format_url, format_type = target.get('url'), target.get('type') | |
46 | if not format_url or not format_type: | |
47 | continue | |
48 | if format_type == 'HLS': | |
49 | formats.extend(self._extract_m3u8_formats( | |
50 | format_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
51 | m3u8_id=format_type, fatal=False)) | |
52 | elif format_type == 'HDS': | |
53 | formats.extend(self._extract_f4m_formats( | |
54 | format_url, video_id, f4m_id=format_type, fatal=False)) | |
55 | elif format_type == 'MPEG_DASH': | |
56 | formats.extend(self._extract_mpd_formats( | |
57 | format_url, video_id, mpd_id=format_type, fatal=False)) | |
58 | elif format_type == 'HSS': | |
59 | formats.extend(self._extract_ism_formats( | |
60 | format_url, video_id, ism_id='mss', fatal=False)) | |
61 | else: | |
62 | formats.append({ | |
63 | 'format_id': format_type, | |
64 | 'url': format_url, | |
65 | }) | |
66 | self._sort_formats(formats) | |
67 | ||
68 | subtitles = {} | |
69 | subtitle_urls = data.get('subtitleUrls') | |
70 | if isinstance(subtitle_urls, list): | |
71 | for subtitle in subtitle_urls: | |
72 | subtitle_url = subtitle.get('url') | |
73 | if subtitle_url and subtitle.get('type') == 'CLOSED': | |
74 | subtitles.setdefault('nl', []).append({'url': subtitle_url}) | |
75 | ||
76 | return { | |
77 | 'id': video_id, | |
78 | 'display_id': video_id, | |
79 | 'title': title, | |
80 | 'description': description, | |
81 | 'formats': formats, | |
82 | 'duration': float_or_none(data.get('duration'), 1000), | |
83 | 'thumbnail': data.get('posterImageUrl'), | |
84 | 'subtitles': subtitles, | |
85 | } | |
86 | ||
87 | ||
88 | class CanvasEenIE(InfoExtractor): | |
41b263ac | 89 | IE_DESC = 'canvas.be and een.be' |
bb5ebd44 | 90 | _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' |
6eff2605 | 91 | _TESTS = [{ |
be2d40a5 | 92 | 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', |
117589df | 93 | 'md5': 'ed66976748d12350b118455979cca293', |
be2d40a5 | 94 | 'info_dict': { |
4e2743ab S |
95 | 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', |
96 | 'display_id': 'de-afspraak-veilt-voor-de-warmste-week', | |
117589df | 97 | 'ext': 'flv', |
4e2743ab S |
98 | 'title': 'De afspraak veilt voor de Warmste Week', |
99 | 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', | |
ec85ded8 | 100 | 'thumbnail': r're:^https?://.*\.jpg$', |
4e2743ab | 101 | 'duration': 49.02, |
117589df S |
102 | }, |
103 | 'expected_warnings': ['is not a supported codec'], | |
6eff2605 S |
104 | }, { |
105 | # with subtitles | |
106 | 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', | |
107 | 'info_dict': { | |
108 | 'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625', | |
109 | 'display_id': 'pieter-0167', | |
110 | 'ext': 'mp4', | |
111 | 'title': 'Pieter 0167', | |
112 | 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', | |
ec85ded8 | 113 | 'thumbnail': r're:^https?://.*\.jpg$', |
6eff2605 S |
114 | 'duration': 2553.08, |
115 | 'subtitles': { | |
116 | 'nl': [{ | |
117 | 'ext': 'vtt', | |
118 | }], | |
119 | }, | |
120 | }, | |
121 | 'params': { | |
122 | 'skip_download': True, | |
117589df S |
123 | }, |
124 | 'skip': 'Pagina niet gevonden', | |
bb5ebd44 S |
125 | }, { |
126 | 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', | |
127 | 'info_dict': { | |
128 | 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', | |
129 | 'display_id': 'herbekijk-sorry-voor-alles', | |
130 | 'ext': 'mp4', | |
131 | 'title': 'Herbekijk Sorry voor alles', | |
132 | 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', | |
ec85ded8 | 133 | 'thumbnail': r're:^https?://.*\.jpg$', |
bb5ebd44 S |
134 | 'duration': 3788.06, |
135 | }, | |
136 | 'params': { | |
137 | 'skip_download': True, | |
117589df S |
138 | }, |
139 | 'skip': 'Episode no longer available', | |
bb5ebd44 S |
140 | }, { |
141 | 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', | |
142 | 'only_matching': True, | |
6eff2605 | 143 | }] |
be2d40a5 TG |
144 | |
145 | def _real_extract(self, url): | |
bb5ebd44 S |
146 | mobj = re.match(self._VALID_URL, url) |
147 | site_id, display_id = mobj.group('site_id'), mobj.group('id') | |
be2d40a5 | 148 | |
4e2743ab | 149 | webpage = self._download_webpage(url, display_id) |
be2d40a5 | 150 | |
117589df | 151 | title = strip_or_none(self._search_regex( |
4e2743ab | 152 | r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', |
bb5ebd44 | 153 | webpage, 'title', default=None) or self._og_search_title( |
117589df | 154 | webpage, default=None)) |
4e2743ab S |
155 | |
156 | video_id = self._html_search_regex( | |
117589df S |
157 | r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', |
158 | group='id') | |
4e2743ab | 159 | |
be2d40a5 | 160 | return { |
117589df S |
161 | '_type': 'url_transparent', |
162 | 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id), | |
163 | 'ie_key': CanvasIE.ie_key(), | |
be2d40a5 | 164 | 'id': video_id, |
4e2743ab | 165 | 'display_id': display_id, |
be2d40a5 | 166 | 'title': title, |
4e2743ab | 167 | 'description': self._og_search_description(webpage), |
be2d40a5 | 168 | } |