]>
Commit | Line | Data |
---|---|---|
4ffbf778 | 1 | from .common import InfoExtractor |
c9fd5306 | 2 | from ..compat import ( |
1c35b3da | 3 | compat_etree_fromstring, |
c9fd5306 S |
4 | compat_parse_qs, |
5 | compat_urllib_parse_unquote, | |
6 | compat_urllib_parse_urlparse, | |
7 | ) | |
4ffbf778 | 8 | from ..utils import ( |
1806a754 | 9 | ExtractorError, |
d984a98d | 10 | float_or_none, |
4ffbf778 S |
11 | int_or_none, |
12 | qualities, | |
8196182a | 13 | smuggle_url, |
372744c5 | 14 | unescapeHTML, |
8196182a | 15 | unified_strdate, |
16 | unsmuggle_url, | |
a3474aa5 | 17 | urlencode_postdata, |
4ffbf778 S |
18 | ) |
19 | ||
20 | ||
21 | class OdnoklassnikiIE(InfoExtractor): | |
d04ca976 S |
22 | _VALID_URL = r'''(?x) |
23 | https?:// | |
24 | (?:(?:www|m|mobile)\.)? | |
25 | (?:odnoklassniki|ok)\.ru/ | |
26 | (?: | |
8196182a | 27 | video(?P<embed>embed)?/| |
d04ca976 S |
28 | web-api/video/moviePlayer/| |
29 | live/| | |
30 | dk\?.*?st\.mvId= | |
31 | ) | |
32 | (?P<id>[\d-]+) | |
33 | ''' | |
bfd973ec | 34 | _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1'] |
4ffbf778 | 35 | _TESTS = [{ |
b8b3f456 K |
36 | 'note': 'Coub embedded', |
37 | 'url': 'http://ok.ru/video/1484130554189', | |
38 | 'info_dict': { | |
39 | 'id': '1keok9', | |
40 | 'ext': 'mp4', | |
41 | 'timestamp': 1545580896, | |
42 | 'view_count': int, | |
8196182a | 43 | 'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg', |
b8b3f456 K |
44 | 'title': 'Народная забава', |
45 | 'uploader': 'Nevata', | |
46 | 'upload_date': '20181223', | |
47 | 'age_limit': 0, | |
48 | 'uploader_id': 'nevata.s', | |
49 | 'like_count': int, | |
50 | 'duration': 8.08, | |
51 | 'repost_count': int, | |
52 | }, | |
53 | }, { | |
54 | 'note': 'vk.com embedded', | |
55 | 'url': 'https://ok.ru/video/3568183087575', | |
56 | 'info_dict': { | |
57 | 'id': '-165101755_456243749', | |
58 | 'ext': 'mp4', | |
59 | 'uploader_id': '-165101755', | |
60 | 'duration': 132, | |
61 | 'timestamp': 1642869935, | |
62 | 'upload_date': '20220122', | |
63 | 'thumbnail': str, | |
64 | 'title': str, | |
65 | 'uploader': str, | |
66 | }, | |
67 | }, { | |
c6bbdadd | 68 | # metadata in JSON |
4ffbf778 | 69 | 'url': 'http://ok.ru/video/20079905452', |
8196182a | 70 | 'md5': '5d2b64756e2af296e3b383a0bc02a6aa', |
4ffbf778 S |
71 | 'info_dict': { |
72 | 'id': '20079905452', | |
73 | 'ext': 'mp4', | |
74 | 'title': 'Культура меняет нас (прекрасный ролик!))', | |
8196182a | 75 | 'thumbnail': str, |
4ffbf778 | 76 | 'duration': 100, |
887e9bc7 | 77 | 'upload_date': '20141207', |
4ffbf778 S |
78 | 'uploader_id': '330537914540', |
79 | 'uploader': 'Виталий Добровольский', | |
80 | 'like_count': int, | |
9f2e7c2f | 81 | 'age_limit': 0, |
c6bbdadd S |
82 | }, |
83 | }, { | |
84 | # metadataUrl | |
c9fd5306 | 85 | 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', |
8196182a | 86 | 'md5': 'f8c951122516af72e6e6ffdd3c41103b', |
c6bbdadd S |
87 | 'info_dict': { |
88 | 'id': '63567059965189-0', | |
89 | 'ext': 'mp4', | |
90 | 'title': 'Девушка без комплексов ...', | |
8196182a | 91 | 'thumbnail': str, |
c6bbdadd | 92 | 'duration': 191, |
887e9bc7 | 93 | 'upload_date': '20150518', |
c6bbdadd | 94 | 'uploader_id': '534380003155', |
887e9bc7 | 95 | 'uploader': '☭ Андрей Мещанинов ☭', |
c6bbdadd | 96 | 'like_count': int, |
9f2e7c2f | 97 | 'age_limit': 0, |
c9fd5306 | 98 | 'start_time': 5, |
4ffbf778 | 99 | }, |
88720ed0 S |
100 | }, { |
101 | # YouTube embed (metadataUrl, provider == USER_YOUTUBE) | |
8196182a | 102 | 'url': 'https://ok.ru/video/3952212382174', |
103 | 'md5': '91749d0bd20763a28d083fa335bbd37a', | |
88720ed0 | 104 | 'info_dict': { |
8196182a | 105 | 'id': '5axVgHHDBvU', |
88720ed0 | 106 | 'ext': 'mp4', |
8196182a | 107 | 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide', |
108 | 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097', | |
109 | 'uploader': 'Lod Mer', | |
110 | 'uploader_id': '575186401502', | |
111 | 'duration': 1529, | |
88720ed0 | 112 | 'age_limit': 0, |
8196182a | 113 | 'upload_date': '20210405', |
114 | 'comment_count': int, | |
115 | 'live_status': 'not_live', | |
116 | 'view_count': int, | |
117 | 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8', | |
118 | 'uploader_url': 'http://www.youtube.com/user/MrKewlkid94', | |
119 | 'channel_follower_count': int, | |
120 | 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'], | |
121 | 'channel_id': 'UCVGtvURtEURYHtJFUegdSug', | |
122 | 'like_count': int, | |
123 | 'availability': 'public', | |
124 | 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug', | |
125 | 'categories': ['Education'], | |
126 | 'playable_in_embed': True, | |
127 | 'channel': 'BornToReact', | |
88720ed0 | 128 | }, |
749b0046 S |
129 | }, { |
130 | # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) | |
131 | 'url': 'http://ok.ru/video/62036049272859-0', | |
132 | 'info_dict': { | |
133 | 'id': '62036049272859-0', | |
134 | 'ext': 'mp4', | |
135 | 'title': 'МУЗЫКА ДОЖДЯ .', | |
136 | 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0', | |
137 | 'upload_date': '20120106', | |
138 | 'uploader_id': '473534735899', | |
139 | 'uploader': 'МARINA D', | |
140 | 'age_limit': 0, | |
141 | }, | |
142 | 'params': { | |
143 | 'skip_download': True, | |
144 | }, | |
58f6ab72 | 145 | 'skip': 'Video has not been found', |
d984a98d | 146 | }, { |
8196182a | 147 | # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading |
d984a98d THD |
148 | 'note': 'Only available in mobile webpage', |
149 | 'url': 'https://m.ok.ru/video/2361249957145', | |
150 | 'info_dict': { | |
151 | 'id': '2361249957145', | |
8196182a | 152 | 'ext': 'mp4', |
d984a98d THD |
153 | 'title': 'Быковское крещение', |
154 | 'duration': 3038.181, | |
155 | }, | |
4ffbf778 S |
156 | }, { |
157 | 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', | |
158 | 'only_matching': True, | |
cdc8d0c3 YCH |
159 | }, { |
160 | 'url': 'http://www.ok.ru/video/20648036891', | |
161 | 'only_matching': True, | |
d762f86e S |
162 | }, { |
163 | 'url': 'http://www.ok.ru/videoembed/20648036891', | |
164 | 'only_matching': True, | |
10e6ed93 S |
165 | }, { |
166 | 'url': 'http://m.ok.ru/video/20079905452', | |
167 | 'only_matching': True, | |
168 | }, { | |
169 | 'url': 'http://mobile.ok.ru/video/20079905452', | |
170 | 'only_matching': True, | |
8005dc68 S |
171 | }, { |
172 | 'url': 'https://www.ok.ru/live/484531969818', | |
173 | 'only_matching': True, | |
608c738c G |
174 | }, { |
175 | 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#', | |
176 | 'only_matching': True, | |
15870747 | 177 | }, { |
178 | # Paid video | |
179 | 'url': 'https://ok.ru/video/954886983203', | |
180 | 'only_matching': True, | |
8196182a | 181 | }, { |
182 | 'url': 'https://ok.ru/videoembed/2932705602075', | |
183 | 'info_dict': { | |
184 | 'id': '2932705602075', | |
185 | 'ext': 'mp4', | |
186 | 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8', | |
187 | 'title': 'Boosty для тебя!', | |
188 | 'uploader_id': '597811038747', | |
189 | 'like_count': 0, | |
190 | 'duration': 35, | |
191 | }, | |
192 | }] | |
193 | ||
194 | _WEBPAGE_TESTS = [{ | |
195 | 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167', | |
196 | 'info_dict': { | |
197 | 'id': '3950343629563', | |
198 | 'ext': 'mp4', | |
199 | 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8', | |
200 | 'title': 'Заяц Бусти.mp4', | |
201 | 'uploader_id': '571368965883', | |
202 | 'like_count': 0, | |
203 | 'duration': 10444, | |
204 | }, | |
4ffbf778 S |
205 | }] |
206 | ||
8196182a | 207 | @classmethod |
208 | def _extract_embed_urls(cls, url, webpage): | |
209 | for x in super()._extract_embed_urls(url, webpage): | |
210 | yield smuggle_url(x, {'referrer': url}) | |
211 | ||
4ffbf778 | 212 | def _real_extract(self, url): |
d984a98d THD |
213 | try: |
214 | return self._extract_desktop(url) | |
215 | except ExtractorError as e: | |
216 | try: | |
217 | return self._extract_mobile(url) | |
218 | except ExtractorError: | |
219 | # error message of desktop webpage is in English | |
220 | raise e | |
221 | ||
222 | def _extract_desktop(self, url): | |
c9fd5306 S |
223 | start_time = int_or_none(compat_parse_qs( |
224 | compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) | |
225 | ||
8196182a | 226 | url, smuggled = unsmuggle_url(url, {}) |
227 | video_id, is_embed = self._match_valid_url(url).group('id', 'embed') | |
228 | mode = 'videoembed' if is_embed else 'video' | |
4ffbf778 | 229 | |
ba2df04b | 230 | webpage = self._download_webpage( |
8196182a | 231 | f'https://ok.ru/{mode}/{video_id}', video_id, |
232 | note='Downloading desktop webpage', | |
233 | headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {}) | |
4ffbf778 | 234 | |
1806a754 S |
235 | error = self._search_regex( |
236 | r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', | |
237 | webpage, 'error', default=None) | |
8196182a | 238 | # Direct link from boosty |
239 | if (error == 'The author of this video has not been found or is blocked' | |
240 | and not smuggled.get('referrer') and mode == 'videoembed'): | |
241 | return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'})) | |
242 | elif error: | |
1806a754 S |
243 | raise ExtractorError(error, expected=True) |
244 | ||
4ffbf778 | 245 | player = self._parse_json( |
372744c5 | 246 | unescapeHTML(self._search_regex( |
1e804244 S |
247 | r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id, |
248 | webpage, 'player', group='player')), | |
4ffbf778 S |
249 | video_id) |
250 | ||
b8b3f456 K |
251 | # embedded external player |
252 | if player.get('isExternalPlayer') and player.get('url'): | |
253 | return self.url_result(player['url']) | |
254 | ||
c6bbdadd S |
255 | flashvars = player['flashvars'] |
256 | ||
257 | metadata = flashvars.get('metadata') | |
258 | if metadata: | |
259 | metadata = self._parse_json(metadata, video_id) | |
260 | else: | |
a3474aa5 RA |
261 | data = {} |
262 | st_location = flashvars.get('location') | |
263 | if st_location: | |
264 | data['st.location'] = st_location | |
c6bbdadd | 265 | metadata = self._download_json( |
b78f5ec4 | 266 | compat_urllib_parse_unquote(flashvars['metadataUrl']), |
a3474aa5 RA |
267 | video_id, 'Downloading metadata JSON', |
268 | data=urlencode_postdata(data)) | |
4ffbf778 S |
269 | |
270 | movie = metadata['movie'] | |
749b0046 S |
271 | |
272 | # Some embedded videos may not contain title in movie dict (e.g. | |
273 | # http://ok.ru/video/62036049272859-0) thus we allow missing title | |
274 | # here and it's going to be extracted later by an extractor that | |
275 | # will process the actual embed. | |
276 | provider = metadata.get('provider') | |
277 | title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title') | |
278 | ||
4ffbf778 S |
279 | thumbnail = movie.get('poster') |
280 | duration = int_or_none(movie.get('duration')) | |
281 | ||
282 | author = metadata.get('author', {}) | |
283 | uploader_id = author.get('id') | |
284 | uploader = author.get('name') | |
285 | ||
286 | upload_date = unified_strdate(self._html_search_meta( | |
c6bbdadd | 287 | 'ya:ovs:upload_date', webpage, 'upload date', default=None)) |
4ffbf778 S |
288 | |
289 | age_limit = None | |
290 | adult = self._html_search_meta( | |
c6bbdadd | 291 | 'ya:ovs:adult', webpage, 'age limit', default=None) |
4ffbf778 S |
292 | if adult: |
293 | age_limit = 18 if adult == 'true' else 0 | |
294 | ||
295 | like_count = int_or_none(metadata.get('likeCount')) | |
296 | ||
88720ed0 | 297 | info = { |
4ffbf778 S |
298 | 'id': video_id, |
299 | 'title': title, | |
300 | 'thumbnail': thumbnail, | |
301 | 'duration': duration, | |
302 | 'upload_date': upload_date, | |
303 | 'uploader': uploader, | |
304 | 'uploader_id': uploader_id, | |
305 | 'like_count': like_count, | |
306 | 'age_limit': age_limit, | |
c9fd5306 | 307 | 'start_time': start_time, |
4ffbf778 | 308 | } |
88720ed0 | 309 | |
b8b3f456 K |
310 | # pladform |
311 | if provider == 'OPEN_GRAPH': | |
312 | info.update({ | |
313 | '_type': 'url_transparent', | |
314 | 'url': movie['contentId'], | |
315 | }) | |
316 | return info | |
317 | ||
749b0046 | 318 | if provider == 'USER_YOUTUBE': |
88720ed0 S |
319 | info.update({ |
320 | '_type': 'url_transparent', | |
321 | 'url': movie['contentId'], | |
322 | }) | |
323 | return info | |
324 | ||
8005dc68 S |
325 | assert title |
326 | if provider == 'LIVE_TV_APP': | |
39ca3b5c | 327 | info['title'] = title |
8005dc68 | 328 | |
8196182a | 329 | quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7')) |
88720ed0 S |
330 | |
331 | formats = [{ | |
332 | 'url': f['url'], | |
333 | 'ext': 'mp4', | |
334 | 'format_id': f['name'], | |
88720ed0 | 335 | } for f in metadata['videos']] |
1c35b3da RA |
336 | |
337 | m3u8_url = metadata.get('hlsManifestUrl') | |
338 | if m3u8_url: | |
339 | formats.extend(self._extract_m3u8_formats( | |
340 | m3u8_url, video_id, 'mp4', 'm3u8_native', | |
341 | m3u8_id='hls', fatal=False)) | |
342 | ||
343 | dash_manifest = metadata.get('metadataEmbedded') | |
344 | if dash_manifest: | |
345 | formats.extend(self._parse_mpd_formats( | |
346 | compat_etree_fromstring(dash_manifest), 'mpd')) | |
347 | ||
348 | for fmt in formats: | |
349 | fmt_type = self._search_regex( | |
350 | r'\btype[/=](\d)', fmt['url'], | |
351 | 'format type', default=None) | |
352 | if fmt_type: | |
353 | fmt['quality'] = quality(fmt_type) | |
354 | ||
8005dc68 S |
355 | # Live formats |
356 | m3u8_url = metadata.get('hlsMasterPlaylistUrl') | |
357 | if m3u8_url: | |
358 | formats.extend(self._extract_m3u8_formats( | |
177877c5 | 359 | m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) |
8005dc68 S |
360 | rtmp_url = metadata.get('rtmpUrl') |
361 | if rtmp_url: | |
362 | formats.append({ | |
363 | 'url': rtmp_url, | |
364 | 'format_id': 'rtmp', | |
365 | 'ext': 'flv', | |
366 | }) | |
367 | ||
15870747 | 368 | if not formats: |
369 | payment_info = metadata.get('paymentInfo') | |
370 | if payment_info: | |
b7da73eb | 371 | self.raise_no_formats('This video is paid, subscribe to download it', expected=True) |
15870747 | 372 | |
e8dcfa3d | 373 | self._sort_formats(formats) |
88720ed0 S |
374 | |
375 | info['formats'] = formats | |
376 | return info | |
d984a98d THD |
377 | |
378 | def _extract_mobile(self, url): | |
379 | video_id = self._match_id(url) | |
380 | ||
381 | webpage = self._download_webpage( | |
382 | 'http://m.ok.ru/video/%s' % video_id, video_id, | |
383 | note='Downloading mobile webpage') | |
384 | ||
385 | error = self._search_regex( | |
386 | r'видео</a>\s*<div\s+class="empty">(.+?)</div>', | |
387 | webpage, 'error', default=None) | |
388 | if error: | |
389 | raise ExtractorError(error, expected=True) | |
390 | ||
391 | json_data = self._search_regex( | |
392 | r'data-video="(.+?)"', webpage, 'json data') | |
393 | json_data = self._parse_json(unescapeHTML(json_data), video_id) or {} | |
394 | ||
395 | return { | |
396 | 'id': video_id, | |
397 | 'title': json_data.get('videoName'), | |
398 | 'duration': float_or_none(json_data.get('videoDuration'), scale=1000), | |
399 | 'thumbnail': json_data.get('videoPosterSrc'), | |
400 | 'formats': [{ | |
401 | 'format_id': 'mobile', | |
402 | 'url': json_data.get('videoSrc'), | |
403 | 'ext': 'mp4', | |
404 | }] | |
405 | } |