]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/odnoklassniki.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / odnoklassniki.py
CommitLineData
4ffbf778 1from .common import InfoExtractor
c9fd5306 2from ..compat import (
1c35b3da 3 compat_etree_fromstring,
c9fd5306
S
4 compat_parse_qs,
5 compat_urllib_parse_unquote,
6 compat_urllib_parse_urlparse,
7)
4ffbf778 8from ..utils import (
1806a754 9 ExtractorError,
d984a98d 10 float_or_none,
4ffbf778
S
11 int_or_none,
12 qualities,
8196182a 13 smuggle_url,
372744c5 14 unescapeHTML,
8196182a 15 unified_strdate,
16 unsmuggle_url,
a3474aa5 17 urlencode_postdata,
4ffbf778
S
18)
19
20
21class OdnoklassnikiIE(InfoExtractor):
d04ca976
S
22 _VALID_URL = r'''(?x)
23 https?://
24 (?:(?:www|m|mobile)\.)?
25 (?:odnoklassniki|ok)\.ru/
26 (?:
8196182a 27 video(?P<embed>embed)?/|
d04ca976
S
28 web-api/video/moviePlayer/|
29 live/|
30 dk\?.*?st\.mvId=
31 )
32 (?P<id>[\d-]+)
33 '''
bfd973ec 34 _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
4ffbf778 35 _TESTS = [{
b8b3f456
K
36 'note': 'Coub embedded',
37 'url': 'http://ok.ru/video/1484130554189',
38 'info_dict': {
39 'id': '1keok9',
40 'ext': 'mp4',
41 'timestamp': 1545580896,
42 'view_count': int,
8196182a 43 'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
b8b3f456
K
44 'title': 'Народная забава',
45 'uploader': 'Nevata',
46 'upload_date': '20181223',
47 'age_limit': 0,
48 'uploader_id': 'nevata.s',
49 'like_count': int,
50 'duration': 8.08,
51 'repost_count': int,
52 },
53 }, {
54 'note': 'vk.com embedded',
55 'url': 'https://ok.ru/video/3568183087575',
56 'info_dict': {
57 'id': '-165101755_456243749',
58 'ext': 'mp4',
59 'uploader_id': '-165101755',
60 'duration': 132,
61 'timestamp': 1642869935,
62 'upload_date': '20220122',
63 'thumbnail': str,
64 'title': str,
65 'uploader': str,
66 },
67 }, {
c6bbdadd 68 # metadata in JSON
4ffbf778 69 'url': 'http://ok.ru/video/20079905452',
8196182a 70 'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
4ffbf778
S
71 'info_dict': {
72 'id': '20079905452',
73 'ext': 'mp4',
74 'title': 'Культура меняет нас (прекрасный ролик!))',
8196182a 75 'thumbnail': str,
4ffbf778 76 'duration': 100,
887e9bc7 77 'upload_date': '20141207',
4ffbf778
S
78 'uploader_id': '330537914540',
79 'uploader': 'Виталий Добровольский',
80 'like_count': int,
9f2e7c2f 81 'age_limit': 0,
c6bbdadd
S
82 },
83 }, {
84 # metadataUrl
c9fd5306 85 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
8196182a 86 'md5': 'f8c951122516af72e6e6ffdd3c41103b',
c6bbdadd
S
87 'info_dict': {
88 'id': '63567059965189-0',
89 'ext': 'mp4',
90 'title': 'Девушка без комплексов ...',
8196182a 91 'thumbnail': str,
c6bbdadd 92 'duration': 191,
887e9bc7 93 'upload_date': '20150518',
c6bbdadd 94 'uploader_id': '534380003155',
887e9bc7 95 'uploader': '☭ Андрей Мещанинов ☭',
c6bbdadd 96 'like_count': int,
9f2e7c2f 97 'age_limit': 0,
c9fd5306 98 'start_time': 5,
4ffbf778 99 },
88720ed0
S
100 }, {
101 # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
8196182a 102 'url': 'https://ok.ru/video/3952212382174',
103 'md5': '91749d0bd20763a28d083fa335bbd37a',
88720ed0 104 'info_dict': {
8196182a 105 'id': '5axVgHHDBvU',
88720ed0 106 'ext': 'mp4',
8196182a 107 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
108 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
109 'uploader': 'Lod Mer',
110 'uploader_id': '575186401502',
111 'duration': 1529,
88720ed0 112 'age_limit': 0,
8196182a 113 'upload_date': '20210405',
114 'comment_count': int,
115 'live_status': 'not_live',
116 'view_count': int,
117 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
118 'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
119 'channel_follower_count': int,
120 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
121 'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
122 'like_count': int,
123 'availability': 'public',
124 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
125 'categories': ['Education'],
126 'playable_in_embed': True,
127 'channel': 'BornToReact',
88720ed0 128 },
749b0046
S
129 }, {
130 # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
131 'url': 'http://ok.ru/video/62036049272859-0',
132 'info_dict': {
133 'id': '62036049272859-0',
134 'ext': 'mp4',
135 'title': 'МУЗЫКА ДОЖДЯ .',
136 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
137 'upload_date': '20120106',
138 'uploader_id': '473534735899',
139 'uploader': 'МARINA D',
140 'age_limit': 0,
141 },
142 'params': {
143 'skip_download': True,
144 },
58f6ab72 145 'skip': 'Video has not been found',
d984a98d 146 }, {
8196182a 147 # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
d984a98d
THD
148 'note': 'Only available in mobile webpage',
149 'url': 'https://m.ok.ru/video/2361249957145',
150 'info_dict': {
151 'id': '2361249957145',
8196182a 152 'ext': 'mp4',
d984a98d
THD
153 'title': 'Быковское крещение',
154 'duration': 3038.181,
155 },
4ffbf778
S
156 }, {
157 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
158 'only_matching': True,
cdc8d0c3
YCH
159 }, {
160 'url': 'http://www.ok.ru/video/20648036891',
161 'only_matching': True,
d762f86e
S
162 }, {
163 'url': 'http://www.ok.ru/videoembed/20648036891',
164 'only_matching': True,
10e6ed93
S
165 }, {
166 'url': 'http://m.ok.ru/video/20079905452',
167 'only_matching': True,
168 }, {
169 'url': 'http://mobile.ok.ru/video/20079905452',
170 'only_matching': True,
8005dc68
S
171 }, {
172 'url': 'https://www.ok.ru/live/484531969818',
173 'only_matching': True,
608c738c
G
174 }, {
175 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
176 'only_matching': True,
15870747 177 }, {
178 # Paid video
179 'url': 'https://ok.ru/video/954886983203',
180 'only_matching': True,
8196182a 181 }, {
182 'url': 'https://ok.ru/videoembed/2932705602075',
183 'info_dict': {
184 'id': '2932705602075',
185 'ext': 'mp4',
186 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
187 'title': 'Boosty для тебя!',
188 'uploader_id': '597811038747',
189 'like_count': 0,
190 'duration': 35,
191 },
192 }]
193
194 _WEBPAGE_TESTS = [{
195 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
196 'info_dict': {
197 'id': '3950343629563',
198 'ext': 'mp4',
199 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
200 'title': 'Заяц Бусти.mp4',
201 'uploader_id': '571368965883',
202 'like_count': 0,
203 'duration': 10444,
204 },
4ffbf778
S
205 }]
206
8196182a 207 @classmethod
208 def _extract_embed_urls(cls, url, webpage):
209 for x in super()._extract_embed_urls(url, webpage):
210 yield smuggle_url(x, {'referrer': url})
211
4ffbf778 212 def _real_extract(self, url):
d984a98d
THD
213 try:
214 return self._extract_desktop(url)
215 except ExtractorError as e:
216 try:
217 return self._extract_mobile(url)
218 except ExtractorError:
219 # error message of desktop webpage is in English
220 raise e
221
222 def _extract_desktop(self, url):
c9fd5306
S
223 start_time = int_or_none(compat_parse_qs(
224 compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
225
8196182a 226 url, smuggled = unsmuggle_url(url, {})
227 video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
228 mode = 'videoembed' if is_embed else 'video'
4ffbf778 229
ba2df04b 230 webpage = self._download_webpage(
8196182a 231 f'https://ok.ru/{mode}/{video_id}', video_id,
232 note='Downloading desktop webpage',
233 headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
4ffbf778 234
1806a754
S
235 error = self._search_regex(
236 r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
237 webpage, 'error', default=None)
8196182a 238 # Direct link from boosty
239 if (error == 'The author of this video has not been found or is blocked'
240 and not smuggled.get('referrer') and mode == 'videoembed'):
241 return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
242 elif error:
1806a754
S
243 raise ExtractorError(error, expected=True)
244
4ffbf778 245 player = self._parse_json(
372744c5 246 unescapeHTML(self._search_regex(
1e804244
S
247 r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
248 webpage, 'player', group='player')),
4ffbf778
S
249 video_id)
250
b8b3f456
K
251 # embedded external player
252 if player.get('isExternalPlayer') and player.get('url'):
253 return self.url_result(player['url'])
254
c6bbdadd
S
255 flashvars = player['flashvars']
256
257 metadata = flashvars.get('metadata')
258 if metadata:
259 metadata = self._parse_json(metadata, video_id)
260 else:
a3474aa5
RA
261 data = {}
262 st_location = flashvars.get('location')
263 if st_location:
264 data['st.location'] = st_location
c6bbdadd 265 metadata = self._download_json(
b78f5ec4 266 compat_urllib_parse_unquote(flashvars['metadataUrl']),
a3474aa5
RA
267 video_id, 'Downloading metadata JSON',
268 data=urlencode_postdata(data))
4ffbf778
S
269
270 movie = metadata['movie']
749b0046
S
271
272 # Some embedded videos may not contain title in movie dict (e.g.
273 # http://ok.ru/video/62036049272859-0) thus we allow missing title
274 # here and it's going to be extracted later by an extractor that
275 # will process the actual embed.
276 provider = metadata.get('provider')
277 title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
278
4ffbf778
S
279 thumbnail = movie.get('poster')
280 duration = int_or_none(movie.get('duration'))
281
282 author = metadata.get('author', {})
283 uploader_id = author.get('id')
284 uploader = author.get('name')
285
286 upload_date = unified_strdate(self._html_search_meta(
c6bbdadd 287 'ya:ovs:upload_date', webpage, 'upload date', default=None))
4ffbf778
S
288
289 age_limit = None
290 adult = self._html_search_meta(
c6bbdadd 291 'ya:ovs:adult', webpage, 'age limit', default=None)
4ffbf778
S
292 if adult:
293 age_limit = 18 if adult == 'true' else 0
294
295 like_count = int_or_none(metadata.get('likeCount'))
296
88720ed0 297 info = {
4ffbf778
S
298 'id': video_id,
299 'title': title,
300 'thumbnail': thumbnail,
301 'duration': duration,
302 'upload_date': upload_date,
303 'uploader': uploader,
304 'uploader_id': uploader_id,
305 'like_count': like_count,
306 'age_limit': age_limit,
c9fd5306 307 'start_time': start_time,
4ffbf778 308 }
88720ed0 309
b8b3f456
K
310 # pladform
311 if provider == 'OPEN_GRAPH':
312 info.update({
313 '_type': 'url_transparent',
314 'url': movie['contentId'],
315 })
316 return info
317
749b0046 318 if provider == 'USER_YOUTUBE':
88720ed0
S
319 info.update({
320 '_type': 'url_transparent',
321 'url': movie['contentId'],
322 })
323 return info
324
8005dc68
S
325 assert title
326 if provider == 'LIVE_TV_APP':
39ca3b5c 327 info['title'] = title
8005dc68 328
8196182a 329 quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
88720ed0
S
330
331 formats = [{
332 'url': f['url'],
333 'ext': 'mp4',
334 'format_id': f['name'],
88720ed0 335 } for f in metadata['videos']]
1c35b3da
RA
336
337 m3u8_url = metadata.get('hlsManifestUrl')
338 if m3u8_url:
339 formats.extend(self._extract_m3u8_formats(
340 m3u8_url, video_id, 'mp4', 'm3u8_native',
341 m3u8_id='hls', fatal=False))
342
343 dash_manifest = metadata.get('metadataEmbedded')
344 if dash_manifest:
345 formats.extend(self._parse_mpd_formats(
346 compat_etree_fromstring(dash_manifest), 'mpd'))
347
348 for fmt in formats:
349 fmt_type = self._search_regex(
350 r'\btype[/=](\d)', fmt['url'],
351 'format type', default=None)
352 if fmt_type:
353 fmt['quality'] = quality(fmt_type)
354
8005dc68
S
355 # Live formats
356 m3u8_url = metadata.get('hlsMasterPlaylistUrl')
357 if m3u8_url:
358 formats.extend(self._extract_m3u8_formats(
177877c5 359 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
8005dc68
S
360 rtmp_url = metadata.get('rtmpUrl')
361 if rtmp_url:
362 formats.append({
363 'url': rtmp_url,
364 'format_id': 'rtmp',
365 'ext': 'flv',
366 })
367
15870747 368 if not formats:
369 payment_info = metadata.get('paymentInfo')
370 if payment_info:
b7da73eb 371 self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
15870747 372
88720ed0
S
373 info['formats'] = formats
374 return info
d984a98d
THD
375
376 def _extract_mobile(self, url):
377 video_id = self._match_id(url)
378
379 webpage = self._download_webpage(
380 'http://m.ok.ru/video/%s' % video_id, video_id,
381 note='Downloading mobile webpage')
382
383 error = self._search_regex(
384 r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
385 webpage, 'error', default=None)
386 if error:
387 raise ExtractorError(error, expected=True)
388
389 json_data = self._search_regex(
390 r'data-video="(.+?)"', webpage, 'json data')
391 json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
392
393 return {
394 'id': video_id,
395 'title': json_data.get('videoName'),
396 'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
397 'thumbnail': json_data.get('videoPosterSrc'),
398 'formats': [{
399 'format_id': 'mobile',
400 'url': json_data.get('videoSrc'),
401 'ext': 'mp4',
402 }]
403 }