]>
Commit | Line | Data |
---|---|---|
097b056c AC |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | js_to_json, | |
9 | url_or_none, | |
10 | urljoin, | |
11 | ) | |
12 | ||
13 | ||
14 | VALID_STREAMS = ('dash', ) | |
15 | ||
16 | ||
17 | class MxplayerIE(InfoExtractor): | |
18 | _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/movie/(?P<slug>[a-z0-9]+(?:-[a-z0-9]+)*)' | |
19 | _TEST = { | |
20 | 'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true', | |
21 | 'info_dict': { | |
22 | 'id': 'b9fa28df3bfb8758874735bbd7d2655a', | |
23 | 'ext': 'mp4', | |
24 | 'title': 'Knock Knock Movie | Watch 2015 Knock Knock Full Movie Online- MX Player', | |
25 | 'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b' | |
26 | }, | |
27 | 'params': { | |
28 | 'skip_download': True, | |
29 | 'format': 'bestvideo' | |
30 | } | |
31 | } | |
32 | ||
33 | def _get_best_stream_url(self, stream): | |
34 | best_stream = list(filter(None, [v for k, v in stream.items()])) | |
35 | return best_stream.pop(0) if len(best_stream) else None | |
36 | ||
37 | def _get_stream_urls(self, video_dict): | |
38 | stream_dict = video_dict.get('stream', {'provider': {}}) | |
39 | stream_provider = stream_dict.get('provider') | |
40 | ||
41 | if not stream_dict[stream_provider]: | |
42 | message = 'No stream provider found' | |
43 | raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | |
44 | ||
45 | streams = [] | |
46 | for stream_name, v in stream_dict[stream_provider].items(): | |
47 | if stream_name in VALID_STREAMS: | |
48 | stream_url = self._get_best_stream_url(v) | |
49 | if stream_url is None: | |
50 | continue | |
51 | streams.append((stream_name, stream_url)) | |
52 | return streams | |
53 | ||
54 | def _real_extract(self, url): | |
55 | mobj = re.match(self._VALID_URL, url) | |
56 | video_slug = mobj.group('slug') | |
57 | ||
58 | video_id = video_slug.split('-')[-1] | |
59 | ||
60 | webpage = self._download_webpage(url, video_id) | |
61 | ||
62 | window_state_json = self._html_search_regex( | |
63 | r'(?s)<script>window\.state\s*[:=]\s(\{.+\})\n(\w+).*(</script>).*', | |
64 | webpage, 'WindowState') | |
65 | ||
66 | source = self._parse_json(js_to_json(window_state_json), video_id) | |
67 | if not source: | |
68 | raise ExtractorError('Cannot find source', expected=True) | |
69 | ||
70 | config_dict = source['config'] | |
71 | video_dict = source['entities'][video_id] | |
72 | stream_urls = self._get_stream_urls(video_dict) | |
73 | ||
74 | title = self._og_search_title(webpage, fatal=True, default=video_dict['title']) | |
75 | ||
76 | formats = [] | |
77 | headers = {'Referer': url} | |
78 | for stream_name, stream_url in stream_urls: | |
79 | if stream_name == 'dash': | |
80 | format_url = url_or_none(urljoin(config_dict['videoCdnBaseUrl'], stream_url)) | |
81 | if not format_url: | |
82 | continue | |
83 | formats.extend(self._extract_mpd_formats( | |
84 | format_url, video_id, mpd_id='dash', headers=headers)) | |
85 | ||
86 | self._sort_formats(formats) | |
87 | info = { | |
88 | 'id': video_id, | |
89 | 'title': title, | |
90 | 'description': video_dict.get('description'), | |
91 | 'formats': formats | |
92 | } | |
93 | ||
94 | if video_dict.get('imageInfo'): | |
95 | info['thumbnails'] = list(map(lambda i: dict(i, **{ | |
96 | 'url': urljoin(config_dict['imageBaseUrl'], i['url']) | |
97 | }), video_dict['imageInfo'])) | |
98 | ||
99 | if video_dict.get('webUrl'): | |
100 | last_part = video_dict['webUrl'].split("/")[-1] | |
101 | info['display_id'] = last_part.replace(video_id, "").rstrip("-") | |
102 | ||
103 | return info |