]>
Commit | Line | Data |
---|---|---|
bfd973ec | 1 | import functools |
0bf79ac4 S |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
3d2623a8 | 5 | from ..networking.exceptions import HTTPError |
0bf79ac4 S |
6 | from ..utils import ( |
7 | ExtractorError, | |
8 | int_or_none, | |
bfd973ec | 9 | smuggle_url, |
665e9452 | 10 | unsmuggle_url, |
3052a30d | 11 | url_or_none, |
0bf79ac4 S |
12 | ) |
13 | ||
14 | ||
15 | class EaglePlatformIE(InfoExtractor): | |
16 | _VALID_URL = r'''(?x) | |
17 | (?: | |
18 | eagleplatform:(?P<custom_host>[^/]+):| | |
19 | https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= | |
20 | ) | |
21 | (?P<id>\d+) | |
22 | ''' | |
bfd973ec | 23 | _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1'] |
0bf79ac4 S |
24 | _TESTS = [{ |
25 | # http://lenta.ru/news/2015/03/06/navalny/ | |
26 | 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', | |
4645432d | 27 | # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used |
0bf79ac4 S |
28 | 'info_dict': { |
29 | 'id': '227304', | |
30 | 'ext': 'mp4', | |
31 | 'title': 'Навальный вышел на свободу', | |
32 | 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', | |
ec85ded8 | 33 | 'thumbnail': r're:^https?://.*\.jpg$', |
0bf79ac4 S |
34 | 'duration': 87, |
35 | 'view_count': int, | |
36 | 'age_limit': 0, | |
37 | }, | |
38 | }, { | |
39 | # http://muz-tv.ru/play/7129/ | |
40 | # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true | |
41 | 'url': 'eagleplatform:media.clipyou.ru:12820', | |
237a4110 | 42 | 'md5': '358597369cf8ba56675c1df15e7af624', |
0bf79ac4 S |
43 | 'info_dict': { |
44 | 'id': '12820', | |
45 | 'ext': 'mp4', | |
46 | 'title': "'O Sole Mio", | |
ec85ded8 | 47 | 'thumbnail': r're:^https?://.*\.jpg$', |
0bf79ac4 S |
48 | 'duration': 216, |
49 | 'view_count': int, | |
50 | }, | |
f67dcc09 | 51 | 'skip': 'Georestricted', |
665e9452 S |
52 | }, { |
53 | # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) | |
7bf539ed | 54 | 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', |
665e9452 | 55 | 'only_matching': True, |
0bf79ac4 S |
56 | }] |
57 | ||
bfd973ec | 58 | @classmethod |
59 | def _extract_embed_urls(cls, url, webpage): | |
60 | add_referer = functools.partial(smuggle_url, data={'referrer': url}) | |
61 | ||
62 | res = tuple(super()._extract_embed_urls(url, webpage)) | |
63 | if res: | |
64 | return map(add_referer, res) | |
65 | ||
5af2fd7f S |
66 | PLAYER_JS_RE = r''' |
67 | <script[^>]+ | |
68 | src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) | |
69 | .+? | |
70 | ''' | |
71 | # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) | |
3083e4dc S |
72 | mobj = re.search( |
73 | r'''(?xs) | |
5af2fd7f | 74 | %s |
3083e4dc | 75 | <div[^>]+ |
5af2fd7f | 76 | class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+ |
3083e4dc | 77 | data-id=["\'](?P<id>\d+) |
5af2fd7f S |
78 | ''' % PLAYER_JS_RE, webpage) |
79 | if mobj is not None: | |
bfd973ec | 80 | return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())] |
5af2fd7f S |
81 | # Generalization of "Javascript code usage", "Combined usage" and |
82 | # "Usage without attaching to DOM" embeddings (see | |
83 | # http://dultonmedia.github.io/eplayer/) | |
84 | mobj = re.search( | |
85 | r'''(?xs) | |
86 | %s | |
87 | <script> | |
88 | .+? | |
89 | new\s+EaglePlayer\( | |
90 | (?:[^,]+\s*,\s*)? | |
91 | { | |
92 | .+? | |
93 | \bid\s*:\s*["\']?(?P<id>\d+) | |
94 | .+? | |
95 | } | |
96 | \s*\) | |
97 | .+? | |
98 | </script> | |
99 | ''' % PLAYER_JS_RE, webpage) | |
3083e4dc | 100 | if mobj is not None: |
bfd973ec | 101 | return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())] |
06a96da1 | 102 | |
3c63e1bb S |
103 | @staticmethod |
104 | def _handle_error(response): | |
0bf79ac4 S |
105 | status = int_or_none(response.get('status', 200)) |
106 | if status != 200: | |
107 | raise ExtractorError(' '.join(response['errors']), expected=True) | |
108 | ||
665e9452 | 109 | def _download_json(self, url_or_request, video_id, *args, **kwargs): |
ae655671 | 110 | try: |
665e9452 S |
111 | response = super(EaglePlatformIE, self)._download_json( |
112 | url_or_request, video_id, *args, **kwargs) | |
ae655671 | 113 | except ExtractorError as ee: |
3d2623a8 | 114 | if isinstance(ee.cause, HTTPError): |
115 | response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) | |
ae655671 YCH |
116 | self._handle_error(response) |
117 | raise | |
22becac4 | 118 | return response |
119 | ||
120 | def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): | |
121 | return self._download_json(url_or_request, video_id, note)['data'][0] | |
0bf79ac4 S |
122 | |
123 | def _real_extract(self, url): | |
665e9452 S |
124 | url, smuggled_data = unsmuggle_url(url, {}) |
125 | ||
5ad28e7f | 126 | mobj = self._match_valid_url(url) |
0bf79ac4 S |
127 | host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') |
128 | ||
665e9452 S |
129 | headers = {} |
130 | query = { | |
131 | 'id': video_id, | |
132 | } | |
133 | ||
134 | referrer = smuggled_data.get('referrer') | |
135 | if referrer: | |
136 | headers['Referer'] = referrer | |
137 | query['referrer'] = referrer | |
138 | ||
0bf79ac4 | 139 | player_data = self._download_json( |
665e9452 S |
140 | 'http://%s/api/player_data' % host, video_id, |
141 | headers=headers, query=query) | |
0bf79ac4 S |
142 | |
143 | media = player_data['data']['playlist']['viewports'][0]['medialist'][0] | |
144 | ||
145 | title = media['title'] | |
146 | description = media.get('description') | |
2f962d0a | 147 | thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') |
0bf79ac4 S |
148 | duration = int_or_none(media.get('duration')) |
149 | view_count = int_or_none(media.get('views')) | |
150 | ||
151 | age_restriction = media.get('age_restriction') | |
152 | age_limit = None | |
153 | if age_restriction: | |
154 | age_limit = 0 if age_restriction == 'allow_all' else 18 | |
155 | ||
d045f0bd | 156 | secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') |
0bf79ac4 | 157 | |
237a4110 | 158 | formats = [] |
159 | ||
9d632b1b | 160 | m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') |
237a4110 | 161 | m3u8_formats = self._extract_m3u8_formats( |
bc40b3a5 S |
162 | m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', |
163 | m3u8_id='hls', fatal=False) | |
237a4110 | 164 | formats.extend(m3u8_formats) |
9d632b1b | 165 | |
bc40b3a5 S |
166 | m3u8_formats_dict = {} |
167 | for f in m3u8_formats: | |
168 | if f.get('height') is not None: | |
169 | m3u8_formats_dict[f['height']] = f | |
170 | ||
171 | mp4_data = self._download_json( | |
c471b345 S |
172 | # Secure mp4 URL is constructed according to Player.prototype.mp4 from |
173 | # http://lentaru.media.eagleplatform.com/player/player.js | |
bc40b3a5 S |
174 | re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), |
175 | video_id, 'Downloading mp4 JSON', fatal=False) | |
176 | if mp4_data: | |
177 | for format_id, format_url in mp4_data.get('data', {}).items(): | |
3052a30d | 178 | if not url_or_none(format_url): |
4645432d | 179 | continue |
bc40b3a5 S |
180 | height = int_or_none(format_id) |
181 | if height is not None and m3u8_formats_dict.get(height): | |
182 | f = m3u8_formats_dict[height].copy() | |
183 | f.update({ | |
184 | 'format_id': f['format_id'].replace('hls', 'http'), | |
185 | 'protocol': 'http', | |
186 | }) | |
187 | else: | |
188 | f = { | |
189 | 'format_id': 'http-%s' % format_id, | |
190 | 'height': int_or_none(format_id), | |
191 | } | |
192 | f['url'] = format_url | |
193 | formats.append(f) | |
9d632b1b | 194 | |
0bf79ac4 S |
195 | return { |
196 | 'id': video_id, | |
197 | 'title': title, | |
198 | 'description': description, | |
199 | 'thumbnail': thumbnail, | |
200 | 'duration': duration, | |
201 | 'view_count': view_count, | |
202 | 'age_limit': age_limit, | |
203 | 'formats': formats, | |
204 | } | |
bfd973ec | 205 | |
206 | ||
207 | class ClipYouEmbedIE(InfoExtractor): | |
208 | _VALID_URL = False | |
209 | ||
210 | @classmethod | |
211 | def _extract_embed_urls(cls, url, webpage): | |
212 | mobj = re.search( | |
213 | r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage) | |
214 | if mobj is not None: | |
215 | yield smuggle_url('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), {'referrer': url}) |