]>
Commit | Line | Data |
---|---|---|
940b606a | 1 | # coding: utf-8 |
4cd759f7 JMF |
2 | from __future__ import unicode_literals |
3 | ||
940b606a S |
4 | import json |
5 | import re | |
4cd759f7 JMF |
6 | import time |
7 | ||
8 | from .common import InfoExtractor | |
0cf2352e | 9 | from ..compat import ( |
0cf2352e | 10 | compat_HTTPError, |
15d1e8a2 S |
11 | compat_str, |
12 | compat_urlparse, | |
0cf2352e | 13 | ) |
5448b781 | 14 | from ..utils import ( |
864a4576 | 15 | determine_ext, |
0cf2352e | 16 | ExtractorError, |
864a4576 | 17 | float_or_none, |
5448b781 | 18 | int_or_none, |
0cf2352e | 19 | remove_end, |
15d1e8a2 S |
20 | try_get, |
21 | unified_strdate, | |
864a4576 | 22 | unified_timestamp, |
5448b781 | 23 | update_url_query, |
15d1e8a2 | 24 | USER_AGENTS, |
5448b781 | 25 | ) |
940b606a | 26 | |
4cd759f7 JMF |
27 | |
28 | class DPlayIE(InfoExtractor): | |
d6b15291 | 29 | _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)' |
95050537 | 30 | |
940b606a | 31 | _TESTS = [{ |
5448b781 | 32 | # non geo restricted, via secure api, unsigned download hls URL |
940b606a S |
33 | 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', |
34 | 'info_dict': { | |
35 | 'id': '3172', | |
864a4576 | 36 | 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', |
5448b781 | 37 | 'ext': 'mp4', |
940b606a S |
38 | 'title': 'Svensken lär sig njuta av livet', |
39 | 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', | |
40 | 'duration': 2650, | |
41 | 'timestamp': 1365454320, | |
42 | 'upload_date': '20130408', | |
43 | 'creator': 'Kanal 5 (Home)', | |
44 | 'series': 'Nugammalt - 77 händelser som format Sverige', | |
45 | 'season_number': 1, | |
46 | 'episode_number': 1, | |
47 | 'age_limit': 0, | |
95050537 | 48 | }, |
940b606a | 49 | }, { |
5448b781 | 50 | # geo restricted, via secure api, unsigned download hls URL |
940b606a S |
51 | 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', |
52 | 'info_dict': { | |
53 | 'id': '70816', | |
864a4576 | 54 | 'display_id': 'mig-og-min-mor/season-6-episode-12', |
5448b781 | 55 | 'ext': 'mp4', |
940b606a S |
56 | 'title': 'Episode 12', |
57 | 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', | |
58 | 'duration': 2563, | |
59 | 'timestamp': 1429696800, | |
60 | 'upload_date': '20150422', | |
5448b781 | 61 | 'creator': 'Kanal 4 (Home)', |
940b606a S |
62 | 'series': 'Mig og min mor', |
63 | 'season_number': 6, | |
64 | 'episode_number': 12, | |
65 | 'age_limit': 0, | |
66 | }, | |
5add979d | 67 | }, { |
fd0ff8ba | 68 | # geo restricted, via direct unsigned hls URL |
5add979d S |
69 | 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', |
70 | 'only_matching': True, | |
864a4576 S |
71 | }, { |
72 | # disco-api | |
73 | 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', | |
74 | 'info_dict': { | |
75 | 'id': '40206', | |
76 | 'display_id': 'i-kongens-klr/sesong-1-episode-7', | |
77 | 'ext': 'mp4', | |
78 | 'title': 'Episode 7', | |
79 | 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', | |
80 | 'duration': 2611.16, | |
81 | 'timestamp': 1516726800, | |
82 | 'upload_date': '20180123', | |
83 | 'series': 'I kongens klær', | |
84 | 'season_number': 1, | |
85 | 'episode_number': 7, | |
86 | }, | |
87 | 'params': { | |
88 | 'format': 'bestvideo', | |
89 | 'skip_download': True, | |
90 | }, | |
a0ee342b | 91 | }, { |
d6b15291 | 92 | |
a0ee342b S |
93 | 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', |
94 | 'only_matching': True, | |
d6b15291 S |
95 | }, { |
96 | 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001', | |
97 | 'only_matching': True, | |
940b606a | 98 | }] |
4cd759f7 JMF |
99 | |
100 | def _real_extract(self, url): | |
940b606a S |
101 | mobj = re.match(self._VALID_URL, url) |
102 | display_id = mobj.group('id') | |
103 | domain = mobj.group('domain') | |
104 | ||
5f95927a S |
105 | self._initialize_geo_bypass({ |
106 | 'countries': [mobj.group('country').upper()], | |
107 | }) | |
a0ee342b | 108 | |
4cd759f7 | 109 | webpage = self._download_webpage(url, display_id) |
4cd759f7 | 110 | |
940b606a | 111 | video_id = self._search_regex( |
864a4576 S |
112 | r'data-video-id=["\'](\d+)', webpage, 'video id', default=None) |
113 | ||
114 | if not video_id: | |
115 | host = mobj.group('host') | |
116 | disco_base = 'https://disco-api.%s' % host | |
117 | self._download_json( | |
118 | '%s/token' % disco_base, display_id, 'Downloading token', | |
119 | query={ | |
120 | 'realm': host.replace('.', ''), | |
121 | }) | |
122 | video = self._download_json( | |
123 | '%s/content/videos/%s' % (disco_base, display_id), display_id, | |
124 | headers={ | |
125 | 'Referer': url, | |
126 | 'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1', | |
127 | }, query={ | |
128 | 'include': 'show' | |
129 | }) | |
130 | video_id = video['data']['id'] | |
131 | info = video['data']['attributes'] | |
132 | title = info['name'] | |
133 | formats = [] | |
134 | for format_id, format_dict in self._download_json( | |
135 | '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), | |
136 | display_id)['data']['attributes']['streaming'].items(): | |
137 | if not isinstance(format_dict, dict): | |
138 | continue | |
139 | format_url = format_dict.get('url') | |
140 | if not format_url: | |
141 | continue | |
142 | ext = determine_ext(format_url) | |
143 | if format_id == 'dash' or ext == 'mpd': | |
144 | formats.extend(self._extract_mpd_formats( | |
145 | format_url, display_id, mpd_id='dash', fatal=False)) | |
146 | elif format_id == 'hls' or ext == 'm3u8': | |
147 | formats.extend(self._extract_m3u8_formats( | |
148 | format_url, display_id, 'mp4', | |
149 | entry_protocol='m3u8_native', m3u8_id='hls', | |
150 | fatal=False)) | |
151 | else: | |
152 | formats.append({ | |
153 | 'url': format_url, | |
154 | 'format_id': format_id, | |
155 | }) | |
156 | self._sort_formats(formats) | |
157 | ||
158 | series = None | |
159 | try: | |
160 | included = video.get('included') | |
161 | if isinstance(included, list): | |
162 | show = next(e for e in included if e.get('type') == 'show') | |
163 | series = try_get( | |
164 | show, lambda x: x['attributes']['name'], compat_str) | |
165 | except StopIteration: | |
166 | pass | |
167 | ||
168 | return { | |
169 | 'id': video_id, | |
170 | 'display_id': display_id, | |
171 | 'title': title, | |
172 | 'description': info.get('description'), | |
173 | 'duration': float_or_none( | |
174 | info.get('videoDuration'), scale=1000), | |
175 | 'timestamp': unified_timestamp(info.get('publishStart')), | |
176 | 'series': series, | |
177 | 'season_number': int_or_none(info.get('seasonNumber')), | |
178 | 'episode_number': int_or_none(info.get('episodeNumber')), | |
179 | 'age_limit': int_or_none(info.get('minimum_age')), | |
180 | 'formats': formats, | |
181 | } | |
95050537 | 182 | |
940b606a S |
183 | info = self._download_json( |
184 | 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), | |
4cd759f7 JMF |
185 | video_id)['data'][0] |
186 | ||
940b606a | 187 | title = info['title'] |
95050537 | 188 | |
940b606a S |
189 | PROTOCOLS = ('hls', 'hds') |
190 | formats = [] | |
95050537 | 191 | |
940b606a S |
192 | def extract_formats(protocol, manifest_url): |
193 | if protocol == 'hls': | |
5448b781 | 194 | m3u8_formats = self._extract_m3u8_formats( |
940b606a | 195 | manifest_url, video_id, ext='mp4', |
5448b781 S |
196 | entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False) |
197 | # Sometimes final URLs inside m3u8 are unsigned, let's fix this | |
3530e0d3 S |
198 | # ourselves. Also fragments' URLs are only served signed for |
199 | # Safari user agent. | |
5448b781 S |
200 | query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query) |
201 | for m3u8_format in m3u8_formats: | |
3530e0d3 S |
202 | m3u8_format.update({ |
203 | 'url': update_url_query(m3u8_format['url'], query), | |
204 | 'http_headers': { | |
205 | 'User-Agent': USER_AGENTS['Safari'], | |
206 | }, | |
207 | }) | |
5448b781 | 208 | formats.extend(m3u8_formats) |
940b606a S |
209 | elif protocol == 'hds': |
210 | formats.extend(self._extract_f4m_formats( | |
211 | manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0', | |
212 | video_id, f4m_id=protocol, fatal=False)) | |
213 | ||
214 | domain_tld = domain.split('.')[-1] | |
397ec446 | 215 | if domain_tld in ('se', 'dk', 'no'): |
940b606a | 216 | for protocol in PROTOCOLS: |
fd0ff8ba | 217 | # Providing dsc-geo allows to bypass geo restriction in some cases |
940b606a S |
218 | self._set_cookie( |
219 | 'secure.dplay.%s' % domain_tld, 'dsc-geo', | |
220 | json.dumps({ | |
221 | 'countryCode': domain_tld.upper(), | |
222 | 'expiry': (time.time() + 20 * 60) * 1000, | |
223 | })) | |
224 | stream = self._download_json( | |
225 | 'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s' | |
226 | % (domain_tld, video_id, protocol), video_id, | |
227 | 'Downloading %s stream JSON' % protocol, fatal=False) | |
228 | if stream and stream.get(protocol): | |
229 | extract_formats(protocol, stream[protocol]) | |
fd0ff8ba S |
230 | |
231 | # The last resort is to try direct unsigned hls/hds URLs from info dictionary. | |
232 | # Sometimes this does work even when secure API with dsc-geo has failed (e.g. | |
233 | # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/). | |
234 | if not formats: | |
940b606a S |
235 | for protocol in PROTOCOLS: |
236 | if info.get(protocol): | |
237 | extract_formats(protocol, info[protocol]) | |
4cd759f7 | 238 | |
19dbaeec S |
239 | self._sort_formats(formats) |
240 | ||
e239413f S |
241 | subtitles = {} |
242 | for lang in ('se', 'sv', 'da', 'nl', 'no'): | |
243 | for format_id in ('web_vtt', 'vtt', 'srt'): | |
244 | subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id)) | |
245 | if subtitle_url: | |
246 | subtitles.setdefault(lang, []).append({'url': subtitle_url}) | |
247 | ||
4cd759f7 JMF |
248 | return { |
249 | 'id': video_id, | |
250 | 'display_id': display_id, | |
940b606a | 251 | 'title': title, |
95050537 | 252 | 'description': info.get('video_metadata_longDescription'), |
940b606a S |
253 | 'duration': int_or_none(info.get('video_metadata_length'), scale=1000), |
254 | 'timestamp': int_or_none(info.get('video_publish_date')), | |
255 | 'creator': info.get('video_metadata_homeChannel'), | |
256 | 'series': info.get('video_metadata_show'), | |
95050537 AR |
257 | 'season_number': int_or_none(info.get('season')), |
258 | 'episode_number': int_or_none(info.get('episode')), | |
940b606a S |
259 | 'age_limit': int_or_none(info.get('minimum_age')), |
260 | 'formats': formats, | |
e239413f | 261 | 'subtitles': subtitles, |
4cd759f7 | 262 | } |
0cf2352e S |
263 | |
264 | ||
265 | class DPlayItIE(InfoExtractor): | |
266 | _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)' | |
267 | _GEO_COUNTRIES = ['IT'] | |
268 | _TEST = { | |
269 | 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', | |
270 | 'md5': '2b808ffb00fc47b884a172ca5d13053c', | |
271 | 'info_dict': { | |
272 | 'id': '6918', | |
273 | 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', | |
274 | 'ext': 'mp4', | |
275 | 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', | |
276 | 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', | |
277 | 'thumbnail': r're:^https?://.*\.jpe?g', | |
278 | 'upload_date': '20160524', | |
279 | 'series': 'Biografie imbarazzanti', | |
280 | 'season_number': 1, | |
281 | 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', | |
282 | 'episode_number': 1, | |
283 | }, | |
284 | } | |
285 | ||
286 | def _real_extract(self, url): | |
287 | display_id = self._match_id(url) | |
288 | ||
289 | webpage = self._download_webpage(url, display_id) | |
290 | ||
0cf2352e S |
291 | title = remove_end(self._og_search_title(webpage), ' | Dplay') |
292 | ||
15d1e8a2 S |
293 | video_id = None |
294 | ||
295 | info = self._search_regex( | |
296 | r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', | |
297 | webpage, 'playback JSON', default=None) | |
298 | if info: | |
299 | for _ in range(2): | |
300 | info = self._parse_json(info, display_id, fatal=False) | |
301 | if not info: | |
302 | break | |
303 | else: | |
304 | video_id = try_get(info, lambda x: x['data']['id']) | |
305 | ||
306 | if not info: | |
307 | info_url = self._search_regex( | |
308 | r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', | |
309 | webpage, 'info url') | |
310 | ||
311 | video_id = info_url.rpartition('/')[-1] | |
312 | ||
313 | try: | |
314 | info = self._download_json( | |
315 | info_url, display_id, headers={ | |
316 | 'Authorization': 'Bearer %s' % self._get_cookies(url).get( | |
317 | 'dplayit_token').value, | |
318 | 'Referer': url, | |
319 | }) | |
320 | except ExtractorError as e: | |
321 | if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): | |
322 | info = self._parse_json(e.cause.read().decode('utf-8'), display_id) | |
323 | error = info['errors'][0] | |
324 | if error.get('code') == 'access.denied.geoblocked': | |
325 | self.raise_geo_restricted( | |
326 | msg=error.get('detail'), countries=self._GEO_COUNTRIES) | |
327 | raise ExtractorError(info['errors'][0]['detail'], expected=True) | |
328 | raise | |
0cf2352e S |
329 | |
330 | hls_url = info['data']['attributes']['streaming']['hls']['url'] | |
331 | ||
332 | formats = self._extract_m3u8_formats( | |
333 | hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', | |
334 | m3u8_id='hls') | |
335 | ||
336 | series = self._html_search_regex( | |
337 | r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>', | |
338 | webpage, 'series', fatal=False) | |
339 | episode = self._search_regex( | |
340 | r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)', | |
341 | webpage, 'episode', fatal=False) | |
342 | ||
343 | mobj = re.search( | |
344 | r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})', | |
345 | webpage) | |
346 | if mobj: | |
347 | season_number = int(mobj.group('season_number')) | |
348 | episode_number = int(mobj.group('episode_number')) | |
349 | upload_date = unified_strdate(mobj.group('upload_date')) | |
350 | else: | |
351 | season_number = episode_number = upload_date = None | |
352 | ||
353 | return { | |
15d1e8a2 | 354 | 'id': compat_str(video_id or display_id), |
0cf2352e S |
355 | 'display_id': display_id, |
356 | 'title': title, | |
357 | 'description': self._og_search_description(webpage), | |
358 | 'thumbnail': self._og_search_thumbnail(webpage), | |
359 | 'series': series, | |
360 | 'season_number': season_number, | |
361 | 'episode': episode, | |
362 | 'episode_number': episode_number, | |
363 | 'upload_date': upload_date, | |
364 | 'formats': formats, | |
365 | } |