]>
Commit | Line | Data |
---|---|---|
940b606a | 1 | # coding: utf-8 |
4cd759f7 JMF |
2 | from __future__ import unicode_literals |
3 | ||
940b606a S |
4 | import json |
5 | import re | |
4cd759f7 JMF |
6 | import time |
7 | ||
8 | from .common import InfoExtractor | |
0cf2352e | 9 | from ..compat import ( |
0cf2352e | 10 | compat_HTTPError, |
15d1e8a2 S |
11 | compat_str, |
12 | compat_urlparse, | |
0cf2352e | 13 | ) |
5448b781 | 14 | from ..utils import ( |
864a4576 | 15 | determine_ext, |
0cf2352e | 16 | ExtractorError, |
864a4576 | 17 | float_or_none, |
5448b781 | 18 | int_or_none, |
0cf2352e | 19 | remove_end, |
15d1e8a2 S |
20 | try_get, |
21 | unified_strdate, | |
864a4576 | 22 | unified_timestamp, |
5448b781 | 23 | update_url_query, |
15d1e8a2 | 24 | USER_AGENTS, |
5448b781 | 25 | ) |
940b606a | 26 | |
4cd759f7 JMF |
27 | |
28 | class DPlayIE(InfoExtractor): | |
d6b15291 | 29 | _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)' |
95050537 | 30 | |
940b606a | 31 | _TESTS = [{ |
5448b781 | 32 | # non geo restricted, via secure api, unsigned download hls URL |
940b606a S |
33 | 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', |
34 | 'info_dict': { | |
35 | 'id': '3172', | |
864a4576 | 36 | 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', |
5448b781 | 37 | 'ext': 'mp4', |
940b606a S |
38 | 'title': 'Svensken lär sig njuta av livet', |
39 | 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', | |
40 | 'duration': 2650, | |
41 | 'timestamp': 1365454320, | |
42 | 'upload_date': '20130408', | |
43 | 'creator': 'Kanal 5 (Home)', | |
44 | 'series': 'Nugammalt - 77 händelser som format Sverige', | |
45 | 'season_number': 1, | |
46 | 'episode_number': 1, | |
47 | 'age_limit': 0, | |
95050537 | 48 | }, |
940b606a | 49 | }, { |
5448b781 | 50 | # geo restricted, via secure api, unsigned download hls URL |
940b606a S |
51 | 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', |
52 | 'info_dict': { | |
53 | 'id': '70816', | |
864a4576 | 54 | 'display_id': 'mig-og-min-mor/season-6-episode-12', |
5448b781 | 55 | 'ext': 'mp4', |
940b606a S |
56 | 'title': 'Episode 12', |
57 | 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', | |
58 | 'duration': 2563, | |
59 | 'timestamp': 1429696800, | |
60 | 'upload_date': '20150422', | |
5448b781 | 61 | 'creator': 'Kanal 4 (Home)', |
940b606a S |
62 | 'series': 'Mig og min mor', |
63 | 'season_number': 6, | |
64 | 'episode_number': 12, | |
65 | 'age_limit': 0, | |
66 | }, | |
5add979d | 67 | }, { |
fd0ff8ba | 68 | # geo restricted, via direct unsigned hls URL |
5add979d S |
69 | 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', |
70 | 'only_matching': True, | |
864a4576 S |
71 | }, { |
72 | # disco-api | |
73 | 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', | |
74 | 'info_dict': { | |
75 | 'id': '40206', | |
76 | 'display_id': 'i-kongens-klr/sesong-1-episode-7', | |
77 | 'ext': 'mp4', | |
78 | 'title': 'Episode 7', | |
79 | 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', | |
80 | 'duration': 2611.16, | |
81 | 'timestamp': 1516726800, | |
82 | 'upload_date': '20180123', | |
83 | 'series': 'I kongens klær', | |
84 | 'season_number': 1, | |
85 | 'episode_number': 7, | |
86 | }, | |
87 | 'params': { | |
88 | 'format': 'bestvideo', | |
89 | 'skip_download': True, | |
90 | }, | |
a0ee342b | 91 | }, { |
d6b15291 | 92 | |
a0ee342b S |
93 | 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', |
94 | 'only_matching': True, | |
d6b15291 S |
95 | }, { |
96 | 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001', | |
97 | 'only_matching': True, | |
940b606a | 98 | }] |
4cd759f7 | 99 | |
aa560616 RA |
100 | def _get_disco_api_info(self, url, display_id, disco_host, realm): |
101 | disco_base = 'https://' + disco_host | |
102 | token = self._download_json( | |
103 | '%s/token' % disco_base, display_id, 'Downloading token', | |
104 | query={ | |
105 | 'realm': realm, | |
106 | })['data']['attributes']['token'] | |
107 | headers = { | |
108 | 'Referer': url, | |
109 | 'Authorization': 'Bearer ' + token, | |
110 | } | |
111 | video = self._download_json( | |
112 | '%s/content/videos/%s' % (disco_base, display_id), display_id, | |
113 | headers=headers, query={ | |
114 | 'include': 'show' | |
115 | }) | |
116 | video_id = video['data']['id'] | |
117 | info = video['data']['attributes'] | |
118 | title = info['name'] | |
119 | formats = [] | |
120 | for format_id, format_dict in self._download_json( | |
121 | '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), | |
122 | display_id, headers=headers)['data']['attributes']['streaming'].items(): | |
123 | if not isinstance(format_dict, dict): | |
124 | continue | |
125 | format_url = format_dict.get('url') | |
126 | if not format_url: | |
127 | continue | |
128 | ext = determine_ext(format_url) | |
129 | if format_id == 'dash' or ext == 'mpd': | |
130 | formats.extend(self._extract_mpd_formats( | |
131 | format_url, display_id, mpd_id='dash', fatal=False)) | |
132 | elif format_id == 'hls' or ext == 'm3u8': | |
133 | formats.extend(self._extract_m3u8_formats( | |
134 | format_url, display_id, 'mp4', | |
135 | entry_protocol='m3u8_native', m3u8_id='hls', | |
136 | fatal=False)) | |
137 | else: | |
138 | formats.append({ | |
139 | 'url': format_url, | |
140 | 'format_id': format_id, | |
141 | }) | |
142 | self._sort_formats(formats) | |
143 | ||
144 | series = None | |
145 | try: | |
146 | included = video.get('included') | |
147 | if isinstance(included, list): | |
148 | show = next(e for e in included if e.get('type') == 'show') | |
149 | series = try_get( | |
150 | show, lambda x: x['attributes']['name'], compat_str) | |
151 | except StopIteration: | |
152 | pass | |
153 | ||
154 | return { | |
155 | 'id': video_id, | |
156 | 'display_id': display_id, | |
157 | 'title': title, | |
158 | 'description': info.get('description'), | |
159 | 'duration': float_or_none( | |
160 | info.get('videoDuration'), scale=1000), | |
161 | 'timestamp': unified_timestamp(info.get('publishStart')), | |
162 | 'series': series, | |
163 | 'season_number': int_or_none(info.get('seasonNumber')), | |
164 | 'episode_number': int_or_none(info.get('episodeNumber')), | |
165 | 'age_limit': int_or_none(info.get('minimum_age')), | |
166 | 'formats': formats, | |
167 | } | |
168 | ||
4cd759f7 | 169 | def _real_extract(self, url): |
940b606a S |
170 | mobj = re.match(self._VALID_URL, url) |
171 | display_id = mobj.group('id') | |
172 | domain = mobj.group('domain') | |
173 | ||
5f95927a S |
174 | self._initialize_geo_bypass({ |
175 | 'countries': [mobj.group('country').upper()], | |
176 | }) | |
a0ee342b | 177 | |
4cd759f7 | 178 | webpage = self._download_webpage(url, display_id) |
4cd759f7 | 179 | |
940b606a | 180 | video_id = self._search_regex( |
864a4576 S |
181 | r'data-video-id=["\'](\d+)', webpage, 'video id', default=None) |
182 | ||
183 | if not video_id: | |
184 | host = mobj.group('host') | |
aa560616 RA |
185 | return self._get_disco_api_info( |
186 | url, display_id, 'disco-api.' + host, host.replace('.', '')) | |
95050537 | 187 | |
940b606a S |
188 | info = self._download_json( |
189 | 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), | |
4cd759f7 JMF |
190 | video_id)['data'][0] |
191 | ||
940b606a | 192 | title = info['title'] |
95050537 | 193 | |
940b606a S |
194 | PROTOCOLS = ('hls', 'hds') |
195 | formats = [] | |
95050537 | 196 | |
940b606a S |
197 | def extract_formats(protocol, manifest_url): |
198 | if protocol == 'hls': | |
5448b781 | 199 | m3u8_formats = self._extract_m3u8_formats( |
940b606a | 200 | manifest_url, video_id, ext='mp4', |
5448b781 S |
201 | entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False) |
202 | # Sometimes final URLs inside m3u8 are unsigned, let's fix this | |
3530e0d3 S |
203 | # ourselves. Also fragments' URLs are only served signed for |
204 | # Safari user agent. | |
5448b781 S |
205 | query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query) |
206 | for m3u8_format in m3u8_formats: | |
3530e0d3 S |
207 | m3u8_format.update({ |
208 | 'url': update_url_query(m3u8_format['url'], query), | |
209 | 'http_headers': { | |
210 | 'User-Agent': USER_AGENTS['Safari'], | |
211 | }, | |
212 | }) | |
5448b781 | 213 | formats.extend(m3u8_formats) |
940b606a S |
214 | elif protocol == 'hds': |
215 | formats.extend(self._extract_f4m_formats( | |
216 | manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0', | |
217 | video_id, f4m_id=protocol, fatal=False)) | |
218 | ||
219 | domain_tld = domain.split('.')[-1] | |
397ec446 | 220 | if domain_tld in ('se', 'dk', 'no'): |
940b606a | 221 | for protocol in PROTOCOLS: |
fd0ff8ba | 222 | # Providing dsc-geo allows to bypass geo restriction in some cases |
940b606a S |
223 | self._set_cookie( |
224 | 'secure.dplay.%s' % domain_tld, 'dsc-geo', | |
225 | json.dumps({ | |
226 | 'countryCode': domain_tld.upper(), | |
227 | 'expiry': (time.time() + 20 * 60) * 1000, | |
228 | })) | |
229 | stream = self._download_json( | |
230 | 'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s' | |
231 | % (domain_tld, video_id, protocol), video_id, | |
232 | 'Downloading %s stream JSON' % protocol, fatal=False) | |
233 | if stream and stream.get(protocol): | |
234 | extract_formats(protocol, stream[protocol]) | |
fd0ff8ba S |
235 | |
236 | # The last resort is to try direct unsigned hls/hds URLs from info dictionary. | |
237 | # Sometimes this does work even when secure API with dsc-geo has failed (e.g. | |
238 | # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/). | |
239 | if not formats: | |
940b606a S |
240 | for protocol in PROTOCOLS: |
241 | if info.get(protocol): | |
242 | extract_formats(protocol, info[protocol]) | |
4cd759f7 | 243 | |
19dbaeec S |
244 | self._sort_formats(formats) |
245 | ||
e239413f S |
246 | subtitles = {} |
247 | for lang in ('se', 'sv', 'da', 'nl', 'no'): | |
248 | for format_id in ('web_vtt', 'vtt', 'srt'): | |
249 | subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id)) | |
250 | if subtitle_url: | |
251 | subtitles.setdefault(lang, []).append({'url': subtitle_url}) | |
252 | ||
4cd759f7 JMF |
253 | return { |
254 | 'id': video_id, | |
255 | 'display_id': display_id, | |
940b606a | 256 | 'title': title, |
95050537 | 257 | 'description': info.get('video_metadata_longDescription'), |
940b606a S |
258 | 'duration': int_or_none(info.get('video_metadata_length'), scale=1000), |
259 | 'timestamp': int_or_none(info.get('video_publish_date')), | |
260 | 'creator': info.get('video_metadata_homeChannel'), | |
261 | 'series': info.get('video_metadata_show'), | |
95050537 AR |
262 | 'season_number': int_or_none(info.get('season')), |
263 | 'episode_number': int_or_none(info.get('episode')), | |
940b606a S |
264 | 'age_limit': int_or_none(info.get('minimum_age')), |
265 | 'formats': formats, | |
e239413f | 266 | 'subtitles': subtitles, |
4cd759f7 | 267 | } |
0cf2352e S |
268 | |
269 | ||
270 | class DPlayItIE(InfoExtractor): | |
271 | _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)' | |
272 | _GEO_COUNTRIES = ['IT'] | |
273 | _TEST = { | |
274 | 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', | |
275 | 'md5': '2b808ffb00fc47b884a172ca5d13053c', | |
276 | 'info_dict': { | |
277 | 'id': '6918', | |
278 | 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', | |
279 | 'ext': 'mp4', | |
280 | 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', | |
281 | 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', | |
282 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
283 | 'upload_date': '20160524', | |
284 | 'series': 'Biografie imbarazzanti', | |
285 | 'season_number': 1, | |
286 | 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', | |
287 | 'episode_number': 1, | |
288 | }, | |
289 | } | |
290 | ||
291 | def _real_extract(self, url): | |
292 | display_id = self._match_id(url) | |
293 | ||
294 | webpage = self._download_webpage(url, display_id) | |
295 | ||
0cf2352e S |
296 | title = remove_end(self._og_search_title(webpage), ' | Dplay') |
297 | ||
15d1e8a2 S |
298 | video_id = None |
299 | ||
300 | info = self._search_regex( | |
301 | r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', | |
302 | webpage, 'playback JSON', default=None) | |
303 | if info: | |
304 | for _ in range(2): | |
305 | info = self._parse_json(info, display_id, fatal=False) | |
306 | if not info: | |
307 | break | |
308 | else: | |
309 | video_id = try_get(info, lambda x: x['data']['id']) | |
310 | ||
311 | if not info: | |
312 | info_url = self._search_regex( | |
313 | r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', | |
314 | webpage, 'info url') | |
315 | ||
316 | video_id = info_url.rpartition('/')[-1] | |
317 | ||
318 | try: | |
319 | info = self._download_json( | |
320 | info_url, display_id, headers={ | |
321 | 'Authorization': 'Bearer %s' % self._get_cookies(url).get( | |
322 | 'dplayit_token').value, | |
323 | 'Referer': url, | |
324 | }) | |
325 | except ExtractorError as e: | |
326 | if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): | |
327 | info = self._parse_json(e.cause.read().decode('utf-8'), display_id) | |
328 | error = info['errors'][0] | |
329 | if error.get('code') == 'access.denied.geoblocked': | |
330 | self.raise_geo_restricted( | |
331 | msg=error.get('detail'), countries=self._GEO_COUNTRIES) | |
332 | raise ExtractorError(info['errors'][0]['detail'], expected=True) | |
333 | raise | |
0cf2352e S |
334 | |
335 | hls_url = info['data']['attributes']['streaming']['hls']['url'] | |
336 | ||
337 | formats = self._extract_m3u8_formats( | |
338 | hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', | |
339 | m3u8_id='hls') | |
340 | ||
341 | series = self._html_search_regex( | |
342 | r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>', | |
343 | webpage, 'series', fatal=False) | |
344 | episode = self._search_regex( | |
345 | r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)', | |
346 | webpage, 'episode', fatal=False) | |
347 | ||
348 | mobj = re.search( | |
349 | r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})', | |
350 | webpage) | |
351 | if mobj: | |
352 | season_number = int(mobj.group('season_number')) | |
353 | episode_number = int(mobj.group('episode_number')) | |
354 | upload_date = unified_strdate(mobj.group('upload_date')) | |
355 | else: | |
356 | season_number = episode_number = upload_date = None | |
357 | ||
358 | return { | |
15d1e8a2 | 359 | 'id': compat_str(video_id or display_id), |
0cf2352e S |
360 | 'display_id': display_id, |
361 | 'title': title, | |
362 | 'description': self._og_search_description(webpage), | |
363 | 'thumbnail': self._og_search_thumbnail(webpage), | |
364 | 'series': series, | |
365 | 'season_number': season_number, | |
366 | 'episode': episode, | |
367 | 'episode_number': episode_number, | |
368 | 'upload_date': upload_date, | |
369 | 'formats': formats, | |
370 | } |