]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/odnoklassniki.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / odnoklassniki.py
CommitLineData
1a2eb5bd 1import urllib.parse
2
4ffbf778 3from .common import InfoExtractor
add96eb9 4from ..compat import compat_etree_fromstring
3d2623a8 5from ..networking import HEADRequest
4ffbf778 6from ..utils import (
1806a754 7 ExtractorError,
d984a98d 8 float_or_none,
4ffbf778
S
9 int_or_none,
10 qualities,
8196182a 11 smuggle_url,
b23b503e 12 traverse_obj,
372744c5 13 unescapeHTML,
8196182a 14 unified_strdate,
15 unsmuggle_url,
1a2eb5bd 16 url_or_none,
a3474aa5 17 urlencode_postdata,
4ffbf778
S
18)
19
20
21class OdnoklassnikiIE(InfoExtractor):
d04ca976
S
22 _VALID_URL = r'''(?x)
23 https?://
24 (?:(?:www|m|mobile)\.)?
25 (?:odnoklassniki|ok)\.ru/
26 (?:
8196182a 27 video(?P<embed>embed)?/|
d04ca976
S
28 web-api/video/moviePlayer/|
29 live/|
30 dk\?.*?st\.mvId=
31 )
32 (?P<id>[\d-]+)
33 '''
bfd973ec 34 _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
4ffbf778 35 _TESTS = [{
b8b3f456
K
36 'note': 'Coub embedded',
37 'url': 'http://ok.ru/video/1484130554189',
38 'info_dict': {
39 'id': '1keok9',
40 'ext': 'mp4',
41 'timestamp': 1545580896,
42 'view_count': int,
1a2eb5bd 43 'thumbnail': r're:^https?://.*\.jpg$',
b8b3f456
K
44 'title': 'Народная забава',
45 'uploader': 'Nevata',
46 'upload_date': '20181223',
47 'age_limit': 0,
48 'uploader_id': 'nevata.s',
49 'like_count': int,
50 'duration': 8.08,
51 'repost_count': int,
52 },
53 }, {
54 'note': 'vk.com embedded',
55 'url': 'https://ok.ru/video/3568183087575',
56 'info_dict': {
57 'id': '-165101755_456243749',
58 'ext': 'mp4',
59 'uploader_id': '-165101755',
60 'duration': 132,
61 'timestamp': 1642869935,
62 'upload_date': '20220122',
63 'thumbnail': str,
64 'title': str,
65 'uploader': str,
66 },
1a2eb5bd 67 'skip': 'vk extractor error',
b8b3f456 68 }, {
1a2eb5bd 69 # metadata in JSON, webm_dash with Firefox UA
4ffbf778 70 'url': 'http://ok.ru/video/20079905452',
1a2eb5bd 71 'md5': '8f477d8931c531374a3e36daec617b2c',
4ffbf778
S
72 'info_dict': {
73 'id': '20079905452',
1a2eb5bd 74 'ext': 'webm',
4ffbf778 75 'title': 'Культура меняет нас (прекрасный ролик!))',
8196182a 76 'thumbnail': str,
4ffbf778 77 'duration': 100,
887e9bc7 78 'upload_date': '20141207',
4ffbf778
S
79 'uploader_id': '330537914540',
80 'uploader': 'Виталий Добровольский',
81 'like_count': int,
9f2e7c2f 82 'age_limit': 0,
c6bbdadd 83 },
1a2eb5bd 84 'params': {
85 'format': 'bv[ext=webm]',
86 'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0'},
87 },
c6bbdadd
S
88 }, {
89 # metadataUrl
c9fd5306 90 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
1a2eb5bd 91 'md5': '2bae2f58eefe1b3d26f3926c4a64d2f3',
c6bbdadd
S
92 'info_dict': {
93 'id': '63567059965189-0',
94 'ext': 'mp4',
95 'title': 'Девушка без комплексов ...',
8196182a 96 'thumbnail': str,
c6bbdadd 97 'duration': 191,
887e9bc7 98 'upload_date': '20150518',
c6bbdadd 99 'uploader_id': '534380003155',
887e9bc7 100 'uploader': '☭ Андрей Мещанинов ☭',
c6bbdadd 101 'like_count': int,
9f2e7c2f 102 'age_limit': 0,
c9fd5306 103 'start_time': 5,
4ffbf778 104 },
1a2eb5bd 105 'params': {'skip_download': 'm3u8'},
88720ed0
S
106 }, {
107 # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
8196182a 108 'url': 'https://ok.ru/video/3952212382174',
1a2eb5bd 109 'md5': '5fb5f83ce16cb212d6bf887282b5da53',
88720ed0 110 'info_dict': {
8196182a 111 'id': '5axVgHHDBvU',
88720ed0 112 'ext': 'mp4',
8196182a 113 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
114 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
115 'uploader': 'Lod Mer',
116 'uploader_id': '575186401502',
117 'duration': 1529,
88720ed0 118 'age_limit': 0,
8196182a 119 'upload_date': '20210405',
120 'comment_count': int,
121 'live_status': 'not_live',
122 'view_count': int,
123 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
1a2eb5bd 124 'uploader_url': 'https://www.youtube.com/@MrKewlkid94',
8196182a 125 'channel_follower_count': int,
126 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
127 'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
128 'like_count': int,
129 'availability': 'public',
130 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
131 'categories': ['Education'],
132 'playable_in_embed': True,
133 'channel': 'BornToReact',
88720ed0 134 },
749b0046
S
135 }, {
136 # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
137 'url': 'http://ok.ru/video/62036049272859-0',
138 'info_dict': {
139 'id': '62036049272859-0',
140 'ext': 'mp4',
141 'title': 'МУЗЫКА ДОЖДЯ .',
142 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
143 'upload_date': '20120106',
144 'uploader_id': '473534735899',
145 'uploader': 'МARINA D',
146 'age_limit': 0,
147 },
148 'params': {
149 'skip_download': True,
150 },
58f6ab72 151 'skip': 'Video has not been found',
d984a98d
THD
152 }, {
153 'note': 'Only available in mobile webpage',
154 'url': 'https://m.ok.ru/video/2361249957145',
155 'info_dict': {
156 'id': '2361249957145',
8196182a 157 'ext': 'mp4',
d984a98d
THD
158 'title': 'Быковское крещение',
159 'duration': 3038.181,
1a2eb5bd 160 'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
d984a98d 161 },
b23b503e 162 }, {
163 'note': 'subtitles',
164 'url': 'https://ok.ru/video/4249587550747',
165 'info_dict': {
166 'id': '4249587550747',
167 'ext': 'mp4',
168 'title': 'Small Country An African Childhood (2020) (1080p) +subtitle',
169 'uploader': 'Sunflower Movies',
170 'uploader_id': '595802161179',
171 'upload_date': '20220816',
172 'duration': 6728,
173 'age_limit': 0,
174 'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
175 'like_count': int,
176 'subtitles': dict,
177 },
178 'params': {
179 'skip_download': True,
180 },
4ffbf778
S
181 }, {
182 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
183 'only_matching': True,
cdc8d0c3
YCH
184 }, {
185 'url': 'http://www.ok.ru/video/20648036891',
186 'only_matching': True,
d762f86e
S
187 }, {
188 'url': 'http://www.ok.ru/videoembed/20648036891',
189 'only_matching': True,
10e6ed93
S
190 }, {
191 'url': 'http://m.ok.ru/video/20079905452',
192 'only_matching': True,
193 }, {
194 'url': 'http://mobile.ok.ru/video/20079905452',
195 'only_matching': True,
8005dc68
S
196 }, {
197 'url': 'https://www.ok.ru/live/484531969818',
198 'only_matching': True,
608c738c
G
199 }, {
200 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
201 'only_matching': True,
15870747 202 }, {
203 # Paid video
204 'url': 'https://ok.ru/video/954886983203',
205 'only_matching': True,
8196182a 206 }, {
207 'url': 'https://ok.ru/videoembed/2932705602075',
208 'info_dict': {
209 'id': '2932705602075',
210 'ext': 'mp4',
211 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
212 'title': 'Boosty для тебя!',
213 'uploader_id': '597811038747',
214 'like_count': 0,
215 'duration': 35,
216 },
217 }]
218
219 _WEBPAGE_TESTS = [{
220 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
221 'info_dict': {
222 'id': '3950343629563',
223 'ext': 'mp4',
224 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
225 'title': 'Заяц Бусти.mp4',
226 'uploader_id': '571368965883',
227 'like_count': 0,
228 'duration': 10444,
229 },
b23b503e 230 'skip': 'Site no longer embeds',
4ffbf778
S
231 }]
232
1a2eb5bd 233 def _clear_cookies(self, cdn_url):
234 # Direct http downloads will fail if CDN cookies are set
235 # so we need to reset them after each format extraction
ad54c913 236 self.cookiejar.clear(domain='.mycdn.me')
237 self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)
1a2eb5bd 238
8196182a 239 @classmethod
240 def _extract_embed_urls(cls, url, webpage):
241 for x in super()._extract_embed_urls(url, webpage):
242 yield smuggle_url(x, {'referrer': url})
243
4ffbf778 244 def _real_extract(self, url):
d984a98d
THD
245 try:
246 return self._extract_desktop(url)
247 except ExtractorError as e:
248 try:
249 return self._extract_mobile(url)
250 except ExtractorError:
251 # error message of desktop webpage is in English
252 raise e
253
254 def _extract_desktop(self, url):
add96eb9 255 start_time = int_or_none(urllib.parse.parse_qs(
256 urllib.parse.urlparse(url).query).get('fromTime', [None])[0])
c9fd5306 257
8196182a 258 url, smuggled = unsmuggle_url(url, {})
259 video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
260 mode = 'videoembed' if is_embed else 'video'
4ffbf778 261
ba2df04b 262 webpage = self._download_webpage(
8196182a 263 f'https://ok.ru/{mode}/{video_id}', video_id,
264 note='Downloading desktop webpage',
265 headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
4ffbf778 266
1806a754
S
267 error = self._search_regex(
268 r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
269 webpage, 'error', default=None)
8196182a 270 # Direct link from boosty
271 if (error == 'The author of this video has not been found or is blocked'
272 and not smuggled.get('referrer') and mode == 'videoembed'):
273 return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
274 elif error:
1806a754
S
275 raise ExtractorError(error, expected=True)
276
4ffbf778 277 player = self._parse_json(
372744c5 278 unescapeHTML(self._search_regex(
add96eb9 279 rf'data-options=(?P<quote>["\'])(?P<player>{{.+?{video_id}.+?}})(?P=quote)',
1e804244 280 webpage, 'player', group='player')),
4ffbf778
S
281 video_id)
282
b8b3f456
K
283 # embedded external player
284 if player.get('isExternalPlayer') and player.get('url'):
285 return self.url_result(player['url'])
286
c6bbdadd
S
287 flashvars = player['flashvars']
288
289 metadata = flashvars.get('metadata')
290 if metadata:
291 metadata = self._parse_json(metadata, video_id)
292 else:
a3474aa5
RA
293 data = {}
294 st_location = flashvars.get('location')
295 if st_location:
296 data['st.location'] = st_location
c6bbdadd 297 metadata = self._download_json(
add96eb9 298 urllib.parse.unquote(flashvars['metadataUrl']),
a3474aa5
RA
299 video_id, 'Downloading metadata JSON',
300 data=urlencode_postdata(data))
4ffbf778
S
301
302 movie = metadata['movie']
749b0046
S
303
304 # Some embedded videos may not contain title in movie dict (e.g.
305 # http://ok.ru/video/62036049272859-0) thus we allow missing title
306 # here and it's going to be extracted later by an extractor that
307 # will process the actual embed.
308 provider = metadata.get('provider')
309 title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
310
4ffbf778
S
311 thumbnail = movie.get('poster')
312 duration = int_or_none(movie.get('duration'))
313
314 author = metadata.get('author', {})
315 uploader_id = author.get('id')
316 uploader = author.get('name')
317
318 upload_date = unified_strdate(self._html_search_meta(
c6bbdadd 319 'ya:ovs:upload_date', webpage, 'upload date', default=None))
4ffbf778
S
320
321 age_limit = None
322 adult = self._html_search_meta(
c6bbdadd 323 'ya:ovs:adult', webpage, 'age limit', default=None)
4ffbf778
S
324 if adult:
325 age_limit = 18 if adult == 'true' else 0
326
327 like_count = int_or_none(metadata.get('likeCount'))
328
b23b503e 329 subtitles = {}
330 for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict):
331 sub_url = sub.get('url')
332 if not sub_url:
333 continue
334 subtitles.setdefault(sub.get('language') or 'en', []).append({
335 'url': sub_url,
336 'ext': 'vtt',
337 })
338
88720ed0 339 info = {
4ffbf778
S
340 'id': video_id,
341 'title': title,
342 'thumbnail': thumbnail,
343 'duration': duration,
344 'upload_date': upload_date,
345 'uploader': uploader,
346 'uploader_id': uploader_id,
347 'like_count': like_count,
348 'age_limit': age_limit,
c9fd5306 349 'start_time': start_time,
b23b503e 350 'subtitles': subtitles,
4ffbf778 351 }
88720ed0 352
b8b3f456
K
353 # pladform
354 if provider == 'OPEN_GRAPH':
355 info.update({
356 '_type': 'url_transparent',
357 'url': movie['contentId'],
358 })
359 return info
360
749b0046 361 if provider == 'USER_YOUTUBE':
88720ed0
S
362 info.update({
363 '_type': 'url_transparent',
364 'url': movie['contentId'],
365 })
366 return info
367
8005dc68
S
368 assert title
369 if provider == 'LIVE_TV_APP':
39ca3b5c 370 info['title'] = title
8005dc68 371
8196182a 372 quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
88720ed0
S
373
374 formats = [{
375 'url': f['url'],
376 'ext': 'mp4',
1a2eb5bd 377 'format_id': f.get('name'),
378 } for f in traverse_obj(metadata, ('videos', lambda _, v: url_or_none(v['url'])))]
1c35b3da 379
1a2eb5bd 380 m3u8_url = traverse_obj(metadata, 'hlsManifestUrl', 'ondemandHls')
1c35b3da
RA
381 if m3u8_url:
382 formats.extend(self._extract_m3u8_formats(
383 m3u8_url, video_id, 'mp4', 'm3u8_native',
384 m3u8_id='hls', fatal=False))
1a2eb5bd 385 self._clear_cookies(m3u8_url)
386
387 for mpd_id, mpd_key in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
388 mpd_url = metadata.get(mpd_key)
389 if mpd_url:
390 formats.extend(self._extract_mpd_formats(
391 mpd_url, video_id, mpd_id=mpd_id, fatal=False))
392 self._clear_cookies(mpd_url)
1c35b3da
RA
393
394 dash_manifest = metadata.get('metadataEmbedded')
395 if dash_manifest:
396 formats.extend(self._parse_mpd_formats(
397 compat_etree_fromstring(dash_manifest), 'mpd'))
398
399 for fmt in formats:
400 fmt_type = self._search_regex(
401 r'\btype[/=](\d)', fmt['url'],
402 'format type', default=None)
403 if fmt_type:
404 fmt['quality'] = quality(fmt_type)
405
8005dc68
S
406 # Live formats
407 m3u8_url = metadata.get('hlsMasterPlaylistUrl')
408 if m3u8_url:
409 formats.extend(self._extract_m3u8_formats(
177877c5 410 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
1a2eb5bd 411 self._clear_cookies(m3u8_url)
8005dc68
S
412 rtmp_url = metadata.get('rtmpUrl')
413 if rtmp_url:
414 formats.append({
415 'url': rtmp_url,
416 'format_id': 'rtmp',
417 'ext': 'flv',
418 })
419
15870747 420 if not formats:
421 payment_info = metadata.get('paymentInfo')
422 if payment_info:
b7da73eb 423 self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
15870747 424
88720ed0
S
425 info['formats'] = formats
426 return info
d984a98d
THD
427
428 def _extract_mobile(self, url):
429 video_id = self._match_id(url)
430
431 webpage = self._download_webpage(
add96eb9 432 f'http://m.ok.ru/video/{video_id}', video_id,
d984a98d
THD
433 note='Downloading mobile webpage')
434
435 error = self._search_regex(
436 r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
437 webpage, 'error', default=None)
438 if error:
439 raise ExtractorError(error, expected=True)
440
441 json_data = self._search_regex(
442 r'data-video="(.+?)"', webpage, 'json data')
443 json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
444
1a2eb5bd 445 redirect_url = self._request_webpage(HEADRequest(
3d2623a8 446 json_data['videoSrc']), video_id, 'Requesting download URL').url
1a2eb5bd 447 self._clear_cookies(redirect_url)
448
d984a98d
THD
449 return {
450 'id': video_id,
451 'title': json_data.get('videoName'),
452 'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
453 'thumbnail': json_data.get('videoPosterSrc'),
454 'formats': [{
455 'format_id': 'mobile',
1a2eb5bd 456 'url': redirect_url,
d984a98d 457 'ext': 'mp4',
add96eb9 458 }],
d984a98d 459 }