]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..networking.exceptions import HTTPError | |
3 | from ..utils import ( | |
4 | determine_ext, | |
5 | ExtractorError, | |
6 | int_or_none, | |
7 | unified_strdate, | |
8 | ) | |
9 | ||
10 | ||
11 | class RadioCanadaIE(InfoExtractor): | |
12 | IE_NAME = 'radiocanada' | |
13 | _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' | |
14 | _TESTS = [ | |
15 | { | |
16 | 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', | |
17 | 'info_dict': { | |
18 | 'id': '7184272', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'Le parcours du tireur capté sur vidéo', | |
21 | 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', | |
22 | 'upload_date': '20141023', | |
23 | }, | |
24 | 'params': { | |
25 | # m3u8 download | |
26 | 'skip_download': True, | |
27 | } | |
28 | }, | |
29 | { | |
30 | # empty Title | |
31 | 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', | |
32 | 'info_dict': { | |
33 | 'id': '7754998', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'letelejournal22h', | |
36 | 'description': 'INTEGRALE WEB 22H-TJ', | |
37 | 'upload_date': '20170720', | |
38 | }, | |
39 | 'params': { | |
40 | # m3u8 download | |
41 | 'skip_download': True, | |
42 | }, | |
43 | }, | |
44 | { | |
45 | # with protectionType but not actually DRM protected | |
46 | 'url': 'radiocanada:toutv:140872', | |
47 | 'info_dict': { | |
48 | 'id': '140872', | |
49 | 'title': 'Épisode 1', | |
50 | 'series': 'District 31', | |
51 | }, | |
52 | 'only_matching': True, | |
53 | } | |
54 | ] | |
55 | _GEO_COUNTRIES = ['CA'] | |
56 | _access_token = None | |
57 | _claims = None | |
58 | ||
59 | def _call_api(self, path, video_id=None, app_code=None, query=None): | |
60 | if not query: | |
61 | query = {} | |
62 | query.update({ | |
63 | 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', | |
64 | 'output': 'json', | |
65 | }) | |
66 | if video_id: | |
67 | query.update({ | |
68 | 'appCode': app_code, | |
69 | 'idMedia': video_id, | |
70 | }) | |
71 | if self._access_token: | |
72 | query['access_token'] = self._access_token | |
73 | try: | |
74 | return self._download_json( | |
75 | 'https://services.radio-canada.ca/media/' + path, video_id, query=query) | |
76 | except ExtractorError as e: | |
77 | if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422): | |
78 | data = self._parse_json(e.cause.response.read().decode(), None) | |
79 | error = data.get('error_description') or data['errorMessage']['text'] | |
80 | raise ExtractorError(error, expected=True) | |
81 | raise | |
82 | ||
83 | def _extract_info(self, app_code, video_id): | |
84 | metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] | |
85 | ||
86 | def get_meta(name): | |
87 | for meta in metas: | |
88 | if meta.get('name') == name: | |
89 | text = meta.get('text') | |
90 | if text: | |
91 | return text | |
92 | ||
93 | # protectionType does not necessarily mean the video is DRM protected (see | |
94 | # https://github.com/ytdl-org/youtube-dl/pull/18609). | |
95 | if get_meta('protectionType'): | |
96 | self.report_warning('This video is probably DRM protected.') | |
97 | ||
98 | query = { | |
99 | 'connectionType': 'hd', | |
100 | 'deviceType': 'ipad', | |
101 | 'multibitrate': 'true', | |
102 | } | |
103 | if self._claims: | |
104 | query['claims'] = self._claims | |
105 | v_data = self._call_api('validation/v2/', video_id, app_code, query) | |
106 | v_url = v_data.get('url') | |
107 | if not v_url: | |
108 | error = v_data['message'] | |
109 | if error == "Le contenu sélectionné n'est pas disponible dans votre pays": | |
110 | raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) | |
111 | if error == 'Le contenu sélectionné est disponible seulement en premium': | |
112 | self.raise_login_required(error) | |
113 | raise ExtractorError( | |
114 | '%s said: %s' % (self.IE_NAME, error), expected=True) | |
115 | formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') | |
116 | ||
117 | subtitles = {} | |
118 | closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') | |
119 | if closed_caption_url: | |
120 | subtitles['fr'] = [{ | |
121 | 'url': closed_caption_url, | |
122 | 'ext': determine_ext(closed_caption_url, 'vtt'), | |
123 | }] | |
124 | ||
125 | return { | |
126 | 'id': video_id, | |
127 | 'title': get_meta('Title') or get_meta('AV-nomEmission'), | |
128 | 'description': get_meta('Description') or get_meta('ShortDescription'), | |
129 | 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), | |
130 | 'duration': int_or_none(get_meta('length')), | |
131 | 'series': get_meta('Emission'), | |
132 | 'season_number': int_or_none('SrcSaison'), | |
133 | 'episode_number': int_or_none('SrcEpisode'), | |
134 | 'upload_date': unified_strdate(get_meta('Date')), | |
135 | 'subtitles': subtitles, | |
136 | 'formats': formats, | |
137 | } | |
138 | ||
139 | def _real_extract(self, url): | |
140 | return self._extract_info(*self._match_valid_url(url).groups()) | |
141 | ||
142 | ||
143 | class RadioCanadaAudioVideoIE(InfoExtractor): | |
144 | IE_NAME = 'radiocanada:audiovideo' | |
145 | _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' | |
146 | _TESTS = [{ | |
147 | 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', | |
148 | 'info_dict': { | |
149 | 'id': '7527184', | |
150 | 'ext': 'mp4', | |
151 | 'title': 'Barack Obama au Vietnam', | |
152 | 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', | |
153 | 'upload_date': '20160523', | |
154 | }, | |
155 | 'params': { | |
156 | # m3u8 download | |
157 | 'skip_download': True, | |
158 | }, | |
159 | }, { | |
160 | 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', | |
161 | 'only_matching': True, | |
162 | }] | |
163 | ||
164 | def _real_extract(self, url): | |
165 | return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) |