]>
Commit | Line | Data |
---|---|---|
444417ed | 1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | xpath_text, | |
9 | find_xpath_attr, | |
10 | determine_ext, | |
11 | int_or_none, | |
12 | unified_strdate, | |
13 | xpath_element, | |
14 | ExtractorError, | |
01a0c511 | 15 | determine_protocol, |
98b7506e | 16 | unsmuggle_url, |
444417ed | 17 | ) |
18 | ||
19 | ||
20 | class RadioCanadaIE(InfoExtractor): | |
21 | IE_NAME = 'radiocanada' | |
22 | _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' | |
931edb2a OB |
23 | _TESTS = [ |
24 | { | |
25 | 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', | |
26 | 'info_dict': { | |
27 | 'id': '7184272', | |
28 | 'ext': 'mp4', | |
29 | 'title': 'Le parcours du tireur capté sur vidéo', | |
30 | 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', | |
31 | 'upload_date': '20141023', | |
32 | }, | |
33 | 'params': { | |
34 | # m3u8 download | |
35 | 'skip_download': True, | |
36 | } | |
444417ed | 37 | }, |
931edb2a OB |
38 | { |
39 | # empty Title | |
40 | 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', | |
41 | 'info_dict': { | |
42 | 'id': '7754998', | |
43 | 'ext': 'mp4', | |
44 | 'title': 'letelejournal22h', | |
45 | 'description': 'INTEGRALE WEB 22H-TJ', | |
46 | 'upload_date': '20170720', | |
47 | }, | |
48 | 'params': { | |
49 | # m3u8 download | |
50 | 'skip_download': True, | |
51 | }, | |
52 | } | |
53 | ] | |
444417ed | 54 | |
55 | def _real_extract(self, url): | |
98b7506e | 56 | url, smuggled_data = unsmuggle_url(url, {}) |
444417ed | 57 | app_code, video_id = re.match(self._VALID_URL, url).groups() |
58 | ||
98b7506e RA |
59 | metadata = self._download_xml( |
60 | 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', | |
61 | video_id, note='Downloading metadata XML', query={ | |
62 | 'appCode': app_code, | |
63 | 'idMedia': video_id, | |
64 | }) | |
65 | ||
66 | def get_meta(name): | |
67 | el = find_xpath_attr(metadata, './/Meta', 'name', name) | |
68 | return el.text if el is not None else None | |
69 | ||
70 | if get_meta('protectionType'): | |
71 | raise ExtractorError('This video is DRM protected.', expected=True) | |
72 | ||
73 | device_types = ['ipad'] | |
98b7506e | 74 | if not smuggled_data: |
8e4041cf | 75 | device_types.append('flash') |
98b7506e | 76 | device_types.append('android') |
882af14d | 77 | |
444417ed | 78 | formats = [] |
a3431e12 | 79 | error = None |
882af14d | 80 | # TODO: extract f4m formats |
444417ed | 81 | # f4m formats can be extracted using flashhd device_type but they produce unplayable file |
882af14d | 82 | for device_type in device_types: |
98b7506e RA |
83 | validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' |
84 | query = { | |
85 | 'appCode': app_code, | |
86 | 'idMedia': video_id, | |
87 | 'connectionType': 'broadband', | |
88 | 'multibitrate': 'true', | |
89 | 'deviceType': device_type, | |
90 | } | |
91 | if smuggled_data: | |
92 | validation_url = 'https://services.radio-canada.ca/media/validation/v2/' | |
93 | query.update(smuggled_data) | |
94 | else: | |
95 | query.update({ | |
444417ed | 96 | # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction |
97 | 'paysJ391wsHjbOJwvCs26toz': 'CA', | |
98 | 'bypasslock': 'NZt5K62gRqfc', | |
98b7506e RA |
99 | }) |
100 | v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) | |
444417ed | 101 | v_url = xpath_text(v_data, 'url') |
102 | if not v_url: | |
103 | continue | |
104 | if v_url == 'null': | |
a3431e12 S |
105 | error = xpath_text(v_data, 'message') |
106 | continue | |
444417ed | 107 | ext = determine_ext(v_url) |
108 | if ext == 'm3u8': | |
109 | formats.extend(self._extract_m3u8_formats( | |
110 | v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | |
111 | elif ext == 'f4m': | |
01a0c511 RA |
112 | formats.extend(self._extract_f4m_formats( |
113 | v_url, video_id, f4m_id='hds', fatal=False)) | |
444417ed | 114 | else: |
115 | ext = determine_ext(v_url) | |
116 | bitrates = xpath_element(v_data, 'bitrates') | |
117 | for url_e in bitrates.findall('url'): | |
118 | tbr = int_or_none(url_e.get('bitrate')) | |
119 | if not tbr: | |
120 | continue | |
01a0c511 RA |
121 | f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url) |
122 | protocol = determine_protocol({'url': f_url}) | |
8e4041cf | 123 | f = { |
01a0c511 RA |
124 | 'format_id': '%s-%d' % (protocol, tbr), |
125 | 'url': f_url, | |
126 | 'ext': 'flv' if protocol == 'rtmp' else ext, | |
127 | 'protocol': protocol, | |
444417ed | 128 | 'width': int_or_none(url_e.get('width')), |
129 | 'height': int_or_none(url_e.get('height')), | |
130 | 'tbr': tbr, | |
8e4041cf RA |
131 | } |
132 | mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url) | |
133 | if mobj: | |
134 | f.update({ | |
135 | 'url': mobj.group('url') + mobj.group('auth'), | |
136 | 'play_path': mobj.group('playpath'), | |
137 | }) | |
138 | formats.append(f) | |
01a0c511 RA |
139 | if protocol == 'rtsp': |
140 | base_url = self._search_regex( | |
141 | r'rtsp://([^?]+)', f_url, 'base url', default=None) | |
142 | if base_url: | |
143 | base_url = 'http://' + base_url | |
144 | formats.extend(self._extract_m3u8_formats( | |
145 | base_url + '/playlist.m3u8', video_id, 'mp4', | |
146 | 'm3u8_native', m3u8_id='hls', fatal=False)) | |
147 | formats.extend(self._extract_f4m_formats( | |
148 | base_url + '/manifest.f4m', video_id, | |
149 | f4m_id='hds', fatal=False)) | |
a3431e12 S |
150 | if not formats and error: |
151 | raise ExtractorError( | |
152 | '%s said: %s' % (self.IE_NAME, error), expected=True) | |
444417ed | 153 | self._sort_formats(formats) |
154 | ||
4f9cd4d3 RA |
155 | subtitles = {} |
156 | closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') | |
157 | if closed_caption_url: | |
158 | subtitles['fr'] = [{ | |
159 | 'url': closed_caption_url, | |
160 | 'ext': determine_ext(closed_caption_url, 'vtt'), | |
161 | }] | |
162 | ||
444417ed | 163 | return { |
164 | 'id': video_id, | |
931edb2a | 165 | 'title': get_meta('Title') or get_meta('AV-nomEmission'), |
444417ed | 166 | 'description': get_meta('Description') or get_meta('ShortDescription'), |
167 | 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), | |
168 | 'duration': int_or_none(get_meta('length')), | |
169 | 'series': get_meta('Emission'), | |
170 | 'season_number': int_or_none('SrcSaison'), | |
171 | 'episode_number': int_or_none('SrcEpisode'), | |
172 | 'upload_date': unified_strdate(get_meta('Date')), | |
4f9cd4d3 | 173 | 'subtitles': subtitles, |
444417ed | 174 | 'formats': formats, |
175 | } | |
176 | ||
177 | ||
178 | class RadioCanadaAudioVideoIE(InfoExtractor): | |
179 | 'radiocanada:audiovideo' | |
180 | _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' | |
181 | _TEST = { | |
182 | 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', | |
183 | 'info_dict': { | |
184 | 'id': '7527184', | |
882af14d | 185 | 'ext': 'mp4', |
444417ed | 186 | 'title': 'Barack Obama au Vietnam', |
187 | 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', | |
188 | 'upload_date': '20160523', | |
189 | }, | |
190 | 'params': { | |
882af14d | 191 | # m3u8 download |
444417ed | 192 | 'skip_download': True, |
193 | }, | |
194 | } | |
195 | ||
196 | def _real_extract(self, url): | |
197 | return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) |