]>
Commit | Line | Data |
---|---|---|
0bf79ac4 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
ae655671 | 7 | from ..compat import compat_HTTPError |
0bf79ac4 S |
8 | from ..utils import ( |
9 | ExtractorError, | |
10 | int_or_none, | |
237a4110 | 11 | url_basename, |
0bf79ac4 S |
12 | ) |
13 | ||
14 | ||
15 | class EaglePlatformIE(InfoExtractor): | |
16 | _VALID_URL = r'''(?x) | |
17 | (?: | |
18 | eagleplatform:(?P<custom_host>[^/]+):| | |
19 | https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= | |
20 | ) | |
21 | (?P<id>\d+) | |
22 | ''' | |
23 | _TESTS = [{ | |
24 | # http://lenta.ru/news/2015/03/06/navalny/ | |
25 | 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', | |
4645432d | 26 | # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used |
0bf79ac4 S |
27 | 'info_dict': { |
28 | 'id': '227304', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Навальный вышел на свободу', | |
31 | 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', | |
32 | 'thumbnail': 're:^https?://.*\.jpg$', | |
33 | 'duration': 87, | |
34 | 'view_count': int, | |
35 | 'age_limit': 0, | |
36 | }, | |
37 | }, { | |
38 | # http://muz-tv.ru/play/7129/ | |
39 | # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true | |
40 | 'url': 'eagleplatform:media.clipyou.ru:12820', | |
237a4110 | 41 | 'md5': '358597369cf8ba56675c1df15e7af624', |
0bf79ac4 S |
42 | 'info_dict': { |
43 | 'id': '12820', | |
44 | 'ext': 'mp4', | |
45 | 'title': "'O Sole Mio", | |
46 | 'thumbnail': 're:^https?://.*\.jpg$', | |
47 | 'duration': 216, | |
48 | 'view_count': int, | |
49 | }, | |
f67dcc09 | 50 | 'skip': 'Georestricted', |
0bf79ac4 S |
51 | }] |
52 | ||
06a96da1 S |
53 | @staticmethod |
54 | def _extract_url(webpage): | |
3083e4dc | 55 | # Regular iframe embedding |
06a96da1 S |
56 | mobj = re.search( |
57 | r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', | |
58 | webpage) | |
59 | if mobj is not None: | |
60 | return mobj.group('url') | |
3083e4dc S |
61 | # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) |
62 | mobj = re.search( | |
63 | r'''(?xs) | |
64 | <script[^>]+ | |
65 | src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) | |
66 | .+? | |
67 | <div[^>]+ | |
68 | class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+ | |
69 | data-id=["\'](?P<id>\d+) | |
70 | ''', webpage) | |
71 | if mobj is not None: | |
72 | return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() | |
06a96da1 | 73 | |
3c63e1bb S |
74 | @staticmethod |
75 | def _handle_error(response): | |
0bf79ac4 S |
76 | status = int_or_none(response.get('status', 200)) |
77 | if status != 200: | |
78 | raise ExtractorError(' '.join(response['errors']), expected=True) | |
79 | ||
22becac4 | 80 | def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): |
ae655671 YCH |
81 | try: |
82 | response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) | |
83 | except ExtractorError as ee: | |
84 | if isinstance(ee.cause, compat_HTTPError): | |
85 | response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) | |
86 | self._handle_error(response) | |
87 | raise | |
22becac4 | 88 | return response |
89 | ||
90 | def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): | |
91 | return self._download_json(url_or_request, video_id, note)['data'][0] | |
0bf79ac4 S |
92 | |
93 | def _real_extract(self, url): | |
94 | mobj = re.match(self._VALID_URL, url) | |
95 | host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') | |
96 | ||
97 | player_data = self._download_json( | |
98 | 'http://%s/api/player_data?id=%s' % (host, video_id), video_id) | |
99 | ||
100 | media = player_data['data']['playlist']['viewports'][0]['medialist'][0] | |
101 | ||
102 | title = media['title'] | |
103 | description = media.get('description') | |
2f962d0a | 104 | thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') |
0bf79ac4 S |
105 | duration = int_or_none(media.get('duration')) |
106 | view_count = int_or_none(media.get('views')) | |
107 | ||
108 | age_restriction = media.get('age_restriction') | |
109 | age_limit = None | |
110 | if age_restriction: | |
111 | age_limit = 0 if age_restriction == 'allow_all' else 18 | |
112 | ||
d045f0bd | 113 | secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') |
0bf79ac4 | 114 | |
237a4110 | 115 | formats = [] |
116 | ||
9d632b1b | 117 | m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') |
237a4110 | 118 | m3u8_formats = self._extract_m3u8_formats( |
9d632b1b | 119 | m3u8_url, video_id, |
e36963e0 | 120 | 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') |
237a4110 | 121 | formats.extend(m3u8_formats) |
9d632b1b | 122 | |
123 | mp4_url = self._get_video_url( | |
c471b345 S |
124 | # Secure mp4 URL is constructed according to Player.prototype.mp4 from |
125 | # http://lentaru.media.eagleplatform.com/player/player.js | |
126 | re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8), | |
9d632b1b | 127 | video_id, 'Downloading mp4 JSON') |
237a4110 | 128 | mp4_url_basename = url_basename(mp4_url) |
129 | for m3u8_format in m3u8_formats: | |
130 | mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) | |
131 | if mobj: | |
132 | http_format = m3u8_format.copy() | |
4645432d YCH |
133 | video_url = mp4_url.replace(mp4_url_basename, mobj.group(1)) |
134 | if not self._is_valid_url(video_url, video_id): | |
135 | continue | |
237a4110 | 136 | http_format.update({ |
4645432d | 137 | 'url': video_url, |
237a4110 | 138 | 'format_id': m3u8_format['format_id'].replace('hls', 'http'), |
139 | 'protocol': 'http', | |
140 | }) | |
141 | formats.append(http_format) | |
9d632b1b | 142 | |
0bf79ac4 S |
143 | self._sort_formats(formats) |
144 | ||
145 | return { | |
146 | 'id': video_id, | |
147 | 'title': title, | |
148 | 'description': description, | |
149 | 'thumbnail': thumbnail, | |
150 | 'duration': duration, | |
151 | 'view_count': view_count, | |
152 | 'age_limit': age_limit, | |
153 | 'formats': formats, | |
154 | } |