]>
Commit | Line | Data |
---|---|---|
4ffbf778 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
416c3ca7 RA |
4 | import re |
5 | ||
4ffbf778 | 6 | from .common import InfoExtractor |
c9fd5306 | 7 | from ..compat import ( |
1c35b3da | 8 | compat_etree_fromstring, |
c9fd5306 S |
9 | compat_parse_qs, |
10 | compat_urllib_parse_unquote, | |
11 | compat_urllib_parse_urlparse, | |
12 | ) | |
4ffbf778 | 13 | from ..utils import ( |
1806a754 | 14 | ExtractorError, |
d984a98d | 15 | float_or_none, |
4ffbf778 S |
16 | unified_strdate, |
17 | int_or_none, | |
18 | qualities, | |
372744c5 | 19 | unescapeHTML, |
a3474aa5 | 20 | urlencode_postdata, |
4ffbf778 S |
21 | ) |
22 | ||
23 | ||
24 | class OdnoklassnikiIE(InfoExtractor): | |
d04ca976 S |
25 | _VALID_URL = r'''(?x) |
26 | https?:// | |
27 | (?:(?:www|m|mobile)\.)? | |
28 | (?:odnoklassniki|ok)\.ru/ | |
29 | (?: | |
30 | video(?:embed)?/| | |
31 | web-api/video/moviePlayer/| | |
32 | live/| | |
33 | dk\?.*?st\.mvId= | |
34 | ) | |
35 | (?P<id>[\d-]+) | |
36 | ''' | |
4ffbf778 | 37 | _TESTS = [{ |
c6bbdadd | 38 | # metadata in JSON |
4ffbf778 | 39 | 'url': 'http://ok.ru/video/20079905452', |
8005dc68 | 40 | 'md5': '0b62089b479e06681abaaca9d204f152', |
4ffbf778 S |
41 | 'info_dict': { |
42 | 'id': '20079905452', | |
43 | 'ext': 'mp4', | |
44 | 'title': 'Культура меняет нас (прекрасный ролик!))', | |
45 | 'duration': 100, | |
887e9bc7 | 46 | 'upload_date': '20141207', |
4ffbf778 S |
47 | 'uploader_id': '330537914540', |
48 | 'uploader': 'Виталий Добровольский', | |
49 | 'like_count': int, | |
9f2e7c2f | 50 | 'age_limit': 0, |
c6bbdadd S |
51 | }, |
52 | }, { | |
53 | # metadataUrl | |
c9fd5306 | 54 | 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', |
58f6ab72 | 55 | 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc', |
c6bbdadd S |
56 | 'info_dict': { |
57 | 'id': '63567059965189-0', | |
58 | 'ext': 'mp4', | |
59 | 'title': 'Девушка без комплексов ...', | |
60 | 'duration': 191, | |
887e9bc7 | 61 | 'upload_date': '20150518', |
c6bbdadd | 62 | 'uploader_id': '534380003155', |
887e9bc7 | 63 | 'uploader': '☭ Андрей Мещанинов ☭', |
c6bbdadd | 64 | 'like_count': int, |
9f2e7c2f | 65 | 'age_limit': 0, |
c9fd5306 | 66 | 'start_time': 5, |
4ffbf778 | 67 | }, |
88720ed0 S |
68 | }, { |
69 | # YouTube embed (metadataUrl, provider == USER_YOUTUBE) | |
70 | 'url': 'http://ok.ru/video/64211978996595-1', | |
58f6ab72 | 71 | 'md5': '2f206894ffb5dbfcce2c5a14b909eea5', |
88720ed0 | 72 | 'info_dict': { |
a3474aa5 | 73 | 'id': 'V_VztHT5BzY', |
88720ed0 S |
74 | 'ext': 'mp4', |
75 | 'title': 'Космическая среда от 26 августа 2015', | |
76 | 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', | |
77 | 'duration': 440, | |
78 | 'upload_date': '20150826', | |
58f6ab72 RA |
79 | 'uploader_id': 'tvroscosmos', |
80 | 'uploader': 'Телестудия Роскосмоса', | |
88720ed0 S |
81 | 'age_limit': 0, |
82 | }, | |
749b0046 S |
83 | }, { |
84 | # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) | |
85 | 'url': 'http://ok.ru/video/62036049272859-0', | |
86 | 'info_dict': { | |
87 | 'id': '62036049272859-0', | |
88 | 'ext': 'mp4', | |
89 | 'title': 'МУЗЫКА ДОЖДЯ .', | |
90 | 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0', | |
91 | 'upload_date': '20120106', | |
92 | 'uploader_id': '473534735899', | |
93 | 'uploader': 'МARINA D', | |
94 | 'age_limit': 0, | |
95 | }, | |
96 | 'params': { | |
97 | 'skip_download': True, | |
98 | }, | |
58f6ab72 | 99 | 'skip': 'Video has not been found', |
d984a98d THD |
100 | }, { |
101 | 'note': 'Only available in mobile webpage', | |
102 | 'url': 'https://m.ok.ru/video/2361249957145', | |
103 | 'info_dict': { | |
104 | 'id': '2361249957145', | |
105 | 'title': 'Быковское крещение', | |
106 | 'duration': 3038.181, | |
107 | }, | |
4ffbf778 S |
108 | }, { |
109 | 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', | |
110 | 'only_matching': True, | |
cdc8d0c3 YCH |
111 | }, { |
112 | 'url': 'http://www.ok.ru/video/20648036891', | |
113 | 'only_matching': True, | |
d762f86e S |
114 | }, { |
115 | 'url': 'http://www.ok.ru/videoembed/20648036891', | |
116 | 'only_matching': True, | |
10e6ed93 S |
117 | }, { |
118 | 'url': 'http://m.ok.ru/video/20079905452', | |
119 | 'only_matching': True, | |
120 | }, { | |
121 | 'url': 'http://mobile.ok.ru/video/20079905452', | |
122 | 'only_matching': True, | |
8005dc68 S |
123 | }, { |
124 | 'url': 'https://www.ok.ru/live/484531969818', | |
125 | 'only_matching': True, | |
608c738c G |
126 | }, { |
127 | 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#', | |
128 | 'only_matching': True, | |
15870747 | 129 | }, { |
130 | # Paid video | |
131 | 'url': 'https://ok.ru/video/954886983203', | |
132 | 'only_matching': True, | |
4ffbf778 S |
133 | }] |
134 | ||
416c3ca7 RA |
135 | @staticmethod |
136 | def _extract_url(webpage): | |
137 | mobj = re.search( | |
138 | r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) | |
139 | if mobj: | |
140 | return mobj.group('url') | |
141 | ||
4ffbf778 | 142 | def _real_extract(self, url): |
d984a98d THD |
143 | try: |
144 | return self._extract_desktop(url) | |
145 | except ExtractorError as e: | |
146 | try: | |
147 | return self._extract_mobile(url) | |
148 | except ExtractorError: | |
149 | # error message of desktop webpage is in English | |
150 | raise e | |
151 | ||
152 | def _extract_desktop(self, url): | |
c9fd5306 S |
153 | start_time = int_or_none(compat_parse_qs( |
154 | compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) | |
155 | ||
4ffbf778 S |
156 | video_id = self._match_id(url) |
157 | ||
ba2df04b | 158 | webpage = self._download_webpage( |
d984a98d THD |
159 | 'http://ok.ru/video/%s' % video_id, video_id, |
160 | note='Downloading desktop webpage') | |
4ffbf778 | 161 | |
1806a754 S |
162 | error = self._search_regex( |
163 | r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', | |
164 | webpage, 'error', default=None) | |
165 | if error: | |
166 | raise ExtractorError(error, expected=True) | |
167 | ||
4ffbf778 | 168 | player = self._parse_json( |
372744c5 | 169 | unescapeHTML(self._search_regex( |
1e804244 S |
170 | r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id, |
171 | webpage, 'player', group='player')), | |
4ffbf778 S |
172 | video_id) |
173 | ||
c6bbdadd S |
174 | flashvars = player['flashvars'] |
175 | ||
176 | metadata = flashvars.get('metadata') | |
177 | if metadata: | |
178 | metadata = self._parse_json(metadata, video_id) | |
179 | else: | |
a3474aa5 RA |
180 | data = {} |
181 | st_location = flashvars.get('location') | |
182 | if st_location: | |
183 | data['st.location'] = st_location | |
c6bbdadd | 184 | metadata = self._download_json( |
b78f5ec4 | 185 | compat_urllib_parse_unquote(flashvars['metadataUrl']), |
a3474aa5 RA |
186 | video_id, 'Downloading metadata JSON', |
187 | data=urlencode_postdata(data)) | |
4ffbf778 S |
188 | |
189 | movie = metadata['movie'] | |
749b0046 S |
190 | |
191 | # Some embedded videos may not contain title in movie dict (e.g. | |
192 | # http://ok.ru/video/62036049272859-0) thus we allow missing title | |
193 | # here and it's going to be extracted later by an extractor that | |
194 | # will process the actual embed. | |
195 | provider = metadata.get('provider') | |
196 | title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title') | |
197 | ||
4ffbf778 S |
198 | thumbnail = movie.get('poster') |
199 | duration = int_or_none(movie.get('duration')) | |
200 | ||
201 | author = metadata.get('author', {}) | |
202 | uploader_id = author.get('id') | |
203 | uploader = author.get('name') | |
204 | ||
205 | upload_date = unified_strdate(self._html_search_meta( | |
c6bbdadd | 206 | 'ya:ovs:upload_date', webpage, 'upload date', default=None)) |
4ffbf778 S |
207 | |
208 | age_limit = None | |
209 | adult = self._html_search_meta( | |
c6bbdadd | 210 | 'ya:ovs:adult', webpage, 'age limit', default=None) |
4ffbf778 S |
211 | if adult: |
212 | age_limit = 18 if adult == 'true' else 0 | |
213 | ||
214 | like_count = int_or_none(metadata.get('likeCount')) | |
215 | ||
88720ed0 | 216 | info = { |
4ffbf778 S |
217 | 'id': video_id, |
218 | 'title': title, | |
219 | 'thumbnail': thumbnail, | |
220 | 'duration': duration, | |
221 | 'upload_date': upload_date, | |
222 | 'uploader': uploader, | |
223 | 'uploader_id': uploader_id, | |
224 | 'like_count': like_count, | |
225 | 'age_limit': age_limit, | |
c9fd5306 | 226 | 'start_time': start_time, |
4ffbf778 | 227 | } |
88720ed0 | 228 | |
749b0046 | 229 | if provider == 'USER_YOUTUBE': |
88720ed0 S |
230 | info.update({ |
231 | '_type': 'url_transparent', | |
232 | 'url': movie['contentId'], | |
233 | }) | |
234 | return info | |
235 | ||
8005dc68 S |
236 | assert title |
237 | if provider == 'LIVE_TV_APP': | |
39ca3b5c | 238 | info['title'] = title |
8005dc68 | 239 | |
1c35b3da | 240 | quality = qualities(('4', '0', '1', '2', '3', '5')) |
88720ed0 S |
241 | |
242 | formats = [{ | |
243 | 'url': f['url'], | |
244 | 'ext': 'mp4', | |
245 | 'format_id': f['name'], | |
88720ed0 | 246 | } for f in metadata['videos']] |
1c35b3da RA |
247 | |
248 | m3u8_url = metadata.get('hlsManifestUrl') | |
249 | if m3u8_url: | |
250 | formats.extend(self._extract_m3u8_formats( | |
251 | m3u8_url, video_id, 'mp4', 'm3u8_native', | |
252 | m3u8_id='hls', fatal=False)) | |
253 | ||
254 | dash_manifest = metadata.get('metadataEmbedded') | |
255 | if dash_manifest: | |
256 | formats.extend(self._parse_mpd_formats( | |
257 | compat_etree_fromstring(dash_manifest), 'mpd')) | |
258 | ||
259 | for fmt in formats: | |
260 | fmt_type = self._search_regex( | |
261 | r'\btype[/=](\d)', fmt['url'], | |
262 | 'format type', default=None) | |
263 | if fmt_type: | |
264 | fmt['quality'] = quality(fmt_type) | |
265 | ||
8005dc68 S |
266 | # Live formats |
267 | m3u8_url = metadata.get('hlsMasterPlaylistUrl') | |
268 | if m3u8_url: | |
269 | formats.extend(self._extract_m3u8_formats( | |
177877c5 | 270 | m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) |
8005dc68 S |
271 | rtmp_url = metadata.get('rtmpUrl') |
272 | if rtmp_url: | |
273 | formats.append({ | |
274 | 'url': rtmp_url, | |
275 | 'format_id': 'rtmp', | |
276 | 'ext': 'flv', | |
277 | }) | |
278 | ||
15870747 | 279 | if not formats: |
280 | payment_info = metadata.get('paymentInfo') | |
281 | if payment_info: | |
b7da73eb | 282 | self.raise_no_formats('This video is paid, subscribe to download it', expected=True) |
15870747 | 283 | |
e8dcfa3d | 284 | self._sort_formats(formats) |
88720ed0 S |
285 | |
286 | info['formats'] = formats | |
287 | return info | |
d984a98d THD |
288 | |
289 | def _extract_mobile(self, url): | |
290 | video_id = self._match_id(url) | |
291 | ||
292 | webpage = self._download_webpage( | |
293 | 'http://m.ok.ru/video/%s' % video_id, video_id, | |
294 | note='Downloading mobile webpage') | |
295 | ||
296 | error = self._search_regex( | |
297 | r'видео</a>\s*<div\s+class="empty">(.+?)</div>', | |
298 | webpage, 'error', default=None) | |
299 | if error: | |
300 | raise ExtractorError(error, expected=True) | |
301 | ||
302 | json_data = self._search_regex( | |
303 | r'data-video="(.+?)"', webpage, 'json data') | |
304 | json_data = self._parse_json(unescapeHTML(json_data), video_id) or {} | |
305 | ||
306 | return { | |
307 | 'id': video_id, | |
308 | 'title': json_data.get('videoName'), | |
309 | 'duration': float_or_none(json_data.get('videoDuration'), scale=1000), | |
310 | 'thumbnail': json_data.get('videoPosterSrc'), | |
311 | 'formats': [{ | |
312 | 'format_id': 'mobile', | |
313 | 'url': json_data.get('videoSrc'), | |
314 | 'ext': 'mp4', | |
315 | }] | |
316 | } |