[yt-dlp.git] / yt_dlp / extractor / odnoklassniki.py

from .common import InfoExtractor
from ..compat import (
    compat_etree_fromstring,
    compat_parse_qs,
    compat_urllib_parse_unquote,
    compat_urllib_parse_urlparse,
)
from ..utils import (
    ExtractorError,
    float_or_none,
    int_or_none,
    qualities,
    smuggle_url,
    traverse_obj,
    unescapeHTML,
    unified_strdate,
    unsmuggle_url,
    urlencode_postdata,
)


class OdnoklassnikiIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                https?://
                    (?:(?:www|m|mobile)\.)?
                    (?:odnoklassniki|ok)\.ru/
                    (?:
                        video(?P<embed>embed)?/|
                        web-api/video/moviePlayer/|
                        live/|
                        dk\?.*?st\.mvId=
                    )
                    (?P<id>[\d-]+)
                '''
    _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
    _TESTS = [{
        'note': 'Coub embedded',
        'url': 'http://ok.ru/video/1484130554189',
        'info_dict': {
            'id': '1keok9',
            'ext': 'mp4',
            'timestamp': 1545580896,
            'view_count': int,
            'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
            'title': 'Народная забава',
            'uploader': 'Nevata',
            'upload_date': '20181223',
            'age_limit': 0,
            'uploader_id': 'nevata.s',
            'like_count': int,
            'duration': 8.08,
            'repost_count': int,
        },
    }, {
        'note': 'vk.com embedded',
        'url': 'https://ok.ru/video/3568183087575',
        'info_dict': {
            'id': '-165101755_456243749',
            'ext': 'mp4',
            'uploader_id': '-165101755',
            'duration': 132,
            'timestamp': 1642869935,
            'upload_date': '20220122',
            'thumbnail': str,
            'title': str,
            'uploader': str,
        },
    }, {
        # metadata in JSON
        'url': 'http://ok.ru/video/20079905452',
        'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
        'info_dict': {
            'id': '20079905452',
            'ext': 'mp4',
            'title': 'Культура меняет нас (прекрасный ролик!))',
            'thumbnail': str,
            'duration': 100,
            'upload_date': '20141207',
            'uploader_id': '330537914540',
            'uploader': 'Виталий Добровольский',
            'like_count': int,
            'age_limit': 0,
        },
    }, {
        # metadataUrl
        'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
        'md5': 'f8c951122516af72e6e6ffdd3c41103b',
        'info_dict': {
            'id': '63567059965189-0',
            'ext': 'mp4',
            'title': 'Девушка без комплексов ...',
            'thumbnail': str,
            'duration': 191,
            'upload_date': '20150518',
            'uploader_id': '534380003155',
            'uploader': '☭ Андрей Мещанинов ☭',
            'like_count': int,
            'age_limit': 0,
            'start_time': 5,
        },
    }, {
        # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
        'url': 'https://ok.ru/video/3952212382174',
        'md5': '91749d0bd20763a28d083fa335bbd37a',
        'info_dict': {
            'id': '5axVgHHDBvU',
            'ext': 'mp4',
            'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
            'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
            'uploader': 'Lod Mer',
            'uploader_id': '575186401502',
            'duration': 1529,
            'age_limit': 0,
            'upload_date': '20210405',
            'comment_count': int,
            'live_status': 'not_live',
            'view_count': int,
            'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
            'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
            'channel_follower_count': int,
            'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
            'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
            'like_count': int,
            'availability': 'public',
            'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
            'categories': ['Education'],
            'playable_in_embed': True,
            'channel': 'BornToReact',
        },
    }, {
        # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
        'url': 'http://ok.ru/video/62036049272859-0',
        'info_dict': {
            'id': '62036049272859-0',
            'ext': 'mp4',
            'title': 'МУЗЫКА     ДОЖДЯ .',
            'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
            'upload_date': '20120106',
            'uploader_id': '473534735899',
            'uploader': 'МARINA D',
            'age_limit': 0,
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Video has not been found',
    }, {
        # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
        'note': 'Only available in mobile webpage',
        'url': 'https://m.ok.ru/video/2361249957145',
        'info_dict': {
            'id': '2361249957145',
            'ext': 'mp4',
            'title': 'Быковское крещение',
            'duration': 3038.181,
        },
        'skip': 'HTTP Error 400',
    }, {
        'note': 'subtitles',
        'url': 'https://ok.ru/video/4249587550747',
        'info_dict': {
            'id': '4249587550747',
            'ext': 'mp4',
            'title': 'Small Country An African Childhood (2020) (1080p) +subtitle',
            'uploader': 'Sunflower Movies',
            'uploader_id': '595802161179',
            'upload_date': '20220816',
            'duration': 6728,
            'age_limit': 0,
            'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
            'like_count': int,
            'subtitles': dict,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
        'only_matching': True,
    }, {
        'url': 'http://www.ok.ru/video/20648036891',
        'only_matching': True,
    }, {
        'url': 'http://www.ok.ru/videoembed/20648036891',
        'only_matching': True,
    }, {
        'url': 'http://m.ok.ru/video/20079905452',
        'only_matching': True,
    }, {
        'url': 'http://mobile.ok.ru/video/20079905452',
        'only_matching': True,
    }, {
        'url': 'https://www.ok.ru/live/484531969818',
        'only_matching': True,
    }, {
        'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
        'only_matching': True,
    }, {
        # Paid video
        'url': 'https://ok.ru/video/954886983203',
        'only_matching': True,
    }, {
        'url': 'https://ok.ru/videoembed/2932705602075',
        'info_dict': {
            'id': '2932705602075',
            'ext': 'mp4',
            'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
            'title': 'Boosty для тебя!',
            'uploader_id': '597811038747',
            'like_count': 0,
            'duration': 35,
        },
    }]

    _WEBPAGE_TESTS = [{
        'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
        'info_dict': {
            'id': '3950343629563',
            'ext': 'mp4',
            'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
            'title': 'Заяц Бусти.mp4',
            'uploader_id': '571368965883',
            'like_count': 0,
            'duration': 10444,
        },
        'skip': 'Site no longer embeds',
    }]

    @classmethod
    def _extract_embed_urls(cls, url, webpage):
        for x in super()._extract_embed_urls(url, webpage):
            yield smuggle_url(x, {'referrer': url})

    def _real_extract(self, url):
        try:
            return self._extract_desktop(url)
        except ExtractorError as e:
            try:
                return self._extract_mobile(url)
            except ExtractorError:
                # error message of desktop webpage is in English
                raise e

    def _extract_desktop(self, url):
        start_time = int_or_none(compat_parse_qs(
            compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])

        url, smuggled = unsmuggle_url(url, {})
        video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
        mode = 'videoembed' if is_embed else 'video'

        webpage = self._download_webpage(
            f'https://ok.ru/{mode}/{video_id}', video_id,
            note='Downloading desktop webpage',
            headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})

        error = self._search_regex(
            r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
            webpage, 'error', default=None)
        # Direct link from boosty
        if (error == 'The author of this video has not been found or is blocked'
                and not smuggled.get('referrer') and mode == 'videoembed'):
            return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
        elif error:
            raise ExtractorError(error, expected=True)

        player = self._parse_json(
            unescapeHTML(self._search_regex(
                r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
                webpage, 'player', group='player')),
            video_id)

        # embedded external player
        if player.get('isExternalPlayer') and player.get('url'):
            return self.url_result(player['url'])

        flashvars = player['flashvars']

        metadata = flashvars.get('metadata')
        if metadata:
            metadata = self._parse_json(metadata, video_id)
        else:
            data = {}
            st_location = flashvars.get('location')
            if st_location:
                data['st.location'] = st_location
            metadata = self._download_json(
                compat_urllib_parse_unquote(flashvars['metadataUrl']),
                video_id, 'Downloading metadata JSON',
                data=urlencode_postdata(data))

        movie = metadata['movie']

        # Some embedded videos may not contain title in movie dict (e.g.
        # http://ok.ru/video/62036049272859-0) thus we allow missing title
        # here and it's going to be extracted later by an extractor that
        # will process the actual embed.
        provider = metadata.get('provider')
        title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')

        thumbnail = movie.get('poster')
        duration = int_or_none(movie.get('duration'))

        author = metadata.get('author', {})
        uploader_id = author.get('id')
        uploader = author.get('name')

        upload_date = unified_strdate(self._html_search_meta(
            'ya:ovs:upload_date', webpage, 'upload date', default=None))

        age_limit = None
        adult = self._html_search_meta(
            'ya:ovs:adult', webpage, 'age limit', default=None)
        if adult:
            age_limit = 18 if adult == 'true' else 0

        like_count = int_or_none(metadata.get('likeCount'))

        subtitles = {}
        for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict):
            sub_url = sub.get('url')
            if not sub_url:
                continue
            subtitles.setdefault(sub.get('language') or 'en', []).append({
                'url': sub_url,
                'ext': 'vtt',
            })

        info = {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'upload_date': upload_date,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'like_count': like_count,
            'age_limit': age_limit,
            'start_time': start_time,
            'subtitles': subtitles,
        }

        # pladform
        if provider == 'OPEN_GRAPH':
            info.update({
                '_type': 'url_transparent',
                'url': movie['contentId'],
            })
            return info

        if provider == 'USER_YOUTUBE':
            info.update({
                '_type': 'url_transparent',
                'url': movie['contentId'],
            })
            return info

        assert title
        if provider == 'LIVE_TV_APP':
            info['title'] = title

        quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))

        formats = [{
            'url': f['url'],
            'ext': 'mp4',
            'format_id': f['name'],
        } for f in metadata['videos']]

        m3u8_url = metadata.get('hlsManifestUrl')
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False))

        dash_manifest = metadata.get('metadataEmbedded')
        if dash_manifest:
            formats.extend(self._parse_mpd_formats(
                compat_etree_fromstring(dash_manifest), 'mpd'))

        for fmt in formats:
            fmt_type = self._search_regex(
                r'\btype[/=](\d)', fmt['url'],
                'format type', default=None)
            if fmt_type:
                fmt['quality'] = quality(fmt_type)

        # Live formats
        m3u8_url = metadata.get('hlsMasterPlaylistUrl')
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
        rtmp_url = metadata.get('rtmpUrl')
        if rtmp_url:
            formats.append({
                'url': rtmp_url,
                'format_id': 'rtmp',
                'ext': 'flv',
            })

        if not formats:
            payment_info = metadata.get('paymentInfo')
            if payment_info:
                self.raise_no_formats('This video is paid, subscribe to download it', expected=True)

        info['formats'] = formats
        return info

    def _extract_mobile(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://m.ok.ru/video/%s' % video_id, video_id,
            note='Downloading mobile webpage')

        error = self._search_regex(
            r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
            webpage, 'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        json_data = self._search_regex(
            r'data-video="(.+?)"', webpage, 'json data')
        json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}

        return {
            'id': video_id,
            'title': json_data.get('videoName'),
            'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
            'thumbnail': json_data.get('videoPosterSrc'),
            'formats': [{
                'format_id': 'mobile',
                'url': json_data.get('videoSrc'),
                'ext': 'mp4',
            }]
        }
Commit	Line	Data
4ffbf778	1	from .common import InfoExtractor
c9fd5306	2	from ..compat import (
1c35b3da	3	compat_etree_fromstring,
c9fd5306 S	4	compat_parse_qs,
	5	compat_urllib_parse_unquote,
	6	compat_urllib_parse_urlparse,
	7	)
4ffbf778	8	from ..utils import (
1806a754	9	ExtractorError,
d984a98d	10	float_or_none,
4ffbf778 S	11	int_or_none,
4ffbf778 S	12	qualities,
8196182a	13	smuggle_url,
b23b503e	14	traverse_obj,
372744c5	15	unescapeHTML,
8196182a	16	unified_strdate,
8196182a	17	unsmuggle_url,
a3474aa5	18	urlencode_postdata,
4ffbf778 S	19	)
	20
	21
	22	class OdnoklassnikiIE(InfoExtractor):
d04ca976 S	23	_VALID_URL = r'''(?x)
	24	https?://
	25	(?:(?:www\|m\|mobile)\.)?
	26	(?:odnoklassniki\|ok)\.ru/
	27	(?:
8196182a	28	video(?P<embed>embed)?/\|
d04ca976 S	29	web-api/video/moviePlayer/\|
	30	live/\|
	31	dk\?.*?st\.mvId=
	32	)
	33	(?P<id>[\d-]+)
	34	'''
bfd973ec	35	_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki\|ok)\.ru/videoembed/.+?)\1']
4ffbf778	36	_TESTS = [{
b8b3f456 K	37	'note': 'Coub embedded',
	38	'url': 'http://ok.ru/video/1484130554189',
	39	'info_dict': {
	40	'id': '1keok9',
	41	'ext': 'mp4',
	42	'timestamp': 1545580896,
	43	'view_count': int,
8196182a	44	'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
b8b3f456 K	45	'title': 'Народная забава',
	46	'uploader': 'Nevata',
	47	'upload_date': '20181223',
	48	'age_limit': 0,
	49	'uploader_id': 'nevata.s',
	50	'like_count': int,
	51	'duration': 8.08,
	52	'repost_count': int,
	53	},
	54	}, {
	55	'note': 'vk.com embedded',
	56	'url': 'https://ok.ru/video/3568183087575',
	57	'info_dict': {
	58	'id': '-165101755_456243749',
	59	'ext': 'mp4',
	60	'uploader_id': '-165101755',
	61	'duration': 132,
	62	'timestamp': 1642869935,
	63	'upload_date': '20220122',
	64	'thumbnail': str,
	65	'title': str,
	66	'uploader': str,
	67	},
	68	}, {
c6bbdadd	69	# metadata in JSON
4ffbf778	70	'url': 'http://ok.ru/video/20079905452',
8196182a	71	'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
4ffbf778 S	72	'info_dict': {
	73	'id': '20079905452',
	74	'ext': 'mp4',
	75	'title': 'Культура меняет нас (прекрасный ролик!))',
8196182a	76	'thumbnail': str,
4ffbf778	77	'duration': 100,
887e9bc7	78	'upload_date': '20141207',
4ffbf778 S	79	'uploader_id': '330537914540',
	80	'uploader': 'Виталий Добровольский',
	81	'like_count': int,
9f2e7c2f	82	'age_limit': 0,
c6bbdadd S	83	},
	84	}, {
	85	# metadataUrl
c9fd5306	86	'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
8196182a	87	'md5': 'f8c951122516af72e6e6ffdd3c41103b',
c6bbdadd S	88	'info_dict': {
	89	'id': '63567059965189-0',
	90	'ext': 'mp4',
	91	'title': 'Девушка без комплексов ...',
8196182a	92	'thumbnail': str,
c6bbdadd	93	'duration': 191,
887e9bc7	94	'upload_date': '20150518',
c6bbdadd	95	'uploader_id': '534380003155',
887e9bc7	96	'uploader': '☭ Андрей Мещанинов ☭',
c6bbdadd	97	'like_count': int,
9f2e7c2f	98	'age_limit': 0,
c9fd5306	99	'start_time': 5,
4ffbf778	100	},
88720ed0 S	101	}, {
88720ed0 S	102	# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
8196182a	103	'url': 'https://ok.ru/video/3952212382174',
8196182a	104	'md5': '91749d0bd20763a28d083fa335bbd37a',
88720ed0	105	'info_dict': {
8196182a	106	'id': '5axVgHHDBvU',
88720ed0	107	'ext': 'mp4',
8196182a	108	'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
	109	'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
	110	'uploader': 'Lod Mer',
	111	'uploader_id': '575186401502',
	112	'duration': 1529,
88720ed0	113	'age_limit': 0,
8196182a	114	'upload_date': '20210405',
	115	'comment_count': int,
	116	'live_status': 'not_live',
	117	'view_count': int,
	118	'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
	119	'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
	120	'channel_follower_count': int,
	121	'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
	122	'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
	123	'like_count': int,
	124	'availability': 'public',
	125	'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
	126	'categories': ['Education'],
	127	'playable_in_embed': True,
	128	'channel': 'BornToReact',
88720ed0	129	},
749b0046 S	130	}, {
	131	# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
	132	'url': 'http://ok.ru/video/62036049272859-0',
	133	'info_dict': {
	134	'id': '62036049272859-0',
	135	'ext': 'mp4',
	136	'title': 'МУЗЫКА ДОЖДЯ .',
	137	'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
	138	'upload_date': '20120106',
	139	'uploader_id': '473534735899',
	140	'uploader': 'МARINA D',
	141	'age_limit': 0,
	142	},
	143	'params': {
	144	'skip_download': True,
	145	},
58f6ab72	146	'skip': 'Video has not been found',
d984a98d	147	}, {
8196182a	148	# TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
d984a98d THD	149	'note': 'Only available in mobile webpage',
	150	'url': 'https://m.ok.ru/video/2361249957145',
	151	'info_dict': {
	152	'id': '2361249957145',
8196182a	153	'ext': 'mp4',
d984a98d THD	154	'title': 'Быковское крещение',
	155	'duration': 3038.181,
	156	},
b23b503e	157	'skip': 'HTTP Error 400',
	158	}, {
	159	'note': 'subtitles',
	160	'url': 'https://ok.ru/video/4249587550747',
	161	'info_dict': {
	162	'id': '4249587550747',
	163	'ext': 'mp4',
	164	'title': 'Small Country An African Childhood (2020) (1080p) +subtitle',
	165	'uploader': 'Sunflower Movies',
	166	'uploader_id': '595802161179',
	167	'upload_date': '20220816',
	168	'duration': 6728,
	169	'age_limit': 0,
	170	'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
	171	'like_count': int,
	172	'subtitles': dict,
	173	},
	174	'params': {
	175	'skip_download': True,
	176	},
4ffbf778 S	177	}, {
	178	'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
	179	'only_matching': True,
cdc8d0c3 YCH	180	}, {
	181	'url': 'http://www.ok.ru/video/20648036891',
	182	'only_matching': True,
d762f86e S	183	}, {
	184	'url': 'http://www.ok.ru/videoembed/20648036891',
	185	'only_matching': True,
10e6ed93 S	186	}, {
	187	'url': 'http://m.ok.ru/video/20079905452',
	188	'only_matching': True,
	189	}, {
	190	'url': 'http://mobile.ok.ru/video/20079905452',
	191	'only_matching': True,
8005dc68 S	192	}, {
	193	'url': 'https://www.ok.ru/live/484531969818',
	194	'only_matching': True,
608c738c G	195	}, {
	196	'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
	197	'only_matching': True,
15870747	198	}, {
	199	# Paid video
	200	'url': 'https://ok.ru/video/954886983203',
	201	'only_matching': True,
8196182a	202	}, {
	203	'url': 'https://ok.ru/videoembed/2932705602075',
	204	'info_dict': {
	205	'id': '2932705602075',
	206	'ext': 'mp4',
	207	'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
	208	'title': 'Boosty для тебя!',
	209	'uploader_id': '597811038747',
	210	'like_count': 0,
	211	'duration': 35,
	212	},
	213	}]
	214
	215	_WEBPAGE_TESTS = [{
	216	'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
	217	'info_dict': {
	218	'id': '3950343629563',
	219	'ext': 'mp4',
	220	'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
	221	'title': 'Заяц Бусти.mp4',
	222	'uploader_id': '571368965883',
	223	'like_count': 0,
	224	'duration': 10444,
	225	},
b23b503e	226	'skip': 'Site no longer embeds',
4ffbf778 S	227	}]
4ffbf778 S	228
8196182a	229	@classmethod
	230	def _extract_embed_urls(cls, url, webpage):
	231	for x in super()._extract_embed_urls(url, webpage):
	232	yield smuggle_url(x, {'referrer': url})
	233
4ffbf778	234	def _real_extract(self, url):
d984a98d THD	235	try:
	236	return self._extract_desktop(url)
	237	except ExtractorError as e:
	238	try:
	239	return self._extract_mobile(url)
	240	except ExtractorError:
	241	# error message of desktop webpage is in English
	242	raise e
	243
	244	def _extract_desktop(self, url):
c9fd5306 S	245	start_time = int_or_none(compat_parse_qs(
	246	compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
	247
8196182a	248	url, smuggled = unsmuggle_url(url, {})
	249	video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
	250	mode = 'videoembed' if is_embed else 'video'
4ffbf778	251
ba2df04b	252	webpage = self._download_webpage(
8196182a	253	f'https://ok.ru/{mode}/{video_id}', video_id,
	254	note='Downloading desktop webpage',
	255	headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
4ffbf778	256
1806a754 S	257	error = self._search_regex(
	258	r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
	259	webpage, 'error', default=None)
8196182a	260	# Direct link from boosty
	261	if (error == 'The author of this video has not been found or is blocked'
	262	and not smuggled.get('referrer') and mode == 'videoembed'):
	263	return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
	264	elif error:
1806a754 S	265	raise ExtractorError(error, expected=True)
1806a754 S	266
4ffbf778	267	player = self._parse_json(
372744c5	268	unescapeHTML(self._search_regex(
1e804244 S	269	r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
1e804244 S	270	webpage, 'player', group='player')),
4ffbf778 S	271	video_id)
4ffbf778 S	272
b8b3f456 K	273	# embedded external player
	274	if player.get('isExternalPlayer') and player.get('url'):
	275	return self.url_result(player['url'])
	276
c6bbdadd S	277	flashvars = player['flashvars']
	278
	279	metadata = flashvars.get('metadata')
	280	if metadata:
	281	metadata = self._parse_json(metadata, video_id)
	282	else:
a3474aa5 RA	283	data = {}
	284	st_location = flashvars.get('location')
	285	if st_location:
	286	data['st.location'] = st_location
c6bbdadd	287	metadata = self._download_json(
b78f5ec4	288	compat_urllib_parse_unquote(flashvars['metadataUrl']),
a3474aa5 RA	289	video_id, 'Downloading metadata JSON',
a3474aa5 RA	290	data=urlencode_postdata(data))
4ffbf778 S	291
4ffbf778 S	292	movie = metadata['movie']
749b0046 S	293
	294	# Some embedded videos may not contain title in movie dict (e.g.
	295	# http://ok.ru/video/62036049272859-0) thus we allow missing title
	296	# here and it's going to be extracted later by an extractor that
	297	# will process the actual embed.
	298	provider = metadata.get('provider')
	299	title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
	300
4ffbf778 S	301	thumbnail = movie.get('poster')
	302	duration = int_or_none(movie.get('duration'))
	303
	304	author = metadata.get('author', {})
	305	uploader_id = author.get('id')
	306	uploader = author.get('name')
	307
	308	upload_date = unified_strdate(self._html_search_meta(
c6bbdadd	309	'ya:ovs:upload_date', webpage, 'upload date', default=None))
4ffbf778 S	310
	311	age_limit = None
	312	adult = self._html_search_meta(
c6bbdadd	313	'ya:ovs:adult', webpage, 'age limit', default=None)
4ffbf778 S	314	if adult:
	315	age_limit = 18 if adult == 'true' else 0
	316
	317	like_count = int_or_none(metadata.get('likeCount'))
	318
b23b503e	319	subtitles = {}
	320	for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict):
	321	sub_url = sub.get('url')
	322	if not sub_url:
	323	continue
	324	subtitles.setdefault(sub.get('language') or 'en', []).append({
	325	'url': sub_url,
	326	'ext': 'vtt',
	327	})
	328
88720ed0	329	info = {
4ffbf778 S	330	'id': video_id,
	331	'title': title,
	332	'thumbnail': thumbnail,
	333	'duration': duration,
	334	'upload_date': upload_date,
	335	'uploader': uploader,
	336	'uploader_id': uploader_id,
	337	'like_count': like_count,
	338	'age_limit': age_limit,
c9fd5306	339	'start_time': start_time,
b23b503e	340	'subtitles': subtitles,
4ffbf778	341	}
88720ed0	342
b8b3f456 K	343	# pladform
	344	if provider == 'OPEN_GRAPH':
	345	info.update({
	346	'_type': 'url_transparent',
	347	'url': movie['contentId'],
	348	})
	349	return info
	350
749b0046	351	if provider == 'USER_YOUTUBE':
88720ed0 S	352	info.update({
	353	'_type': 'url_transparent',
	354	'url': movie['contentId'],
	355	})
	356	return info
	357
8005dc68 S	358	assert title
8005dc68 S	359	if provider == 'LIVE_TV_APP':
39ca3b5c	360	info['title'] = title
8005dc68	361
8196182a	362	quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
88720ed0 S	363
	364	formats = [{
	365	'url': f['url'],
	366	'ext': 'mp4',
	367	'format_id': f['name'],
88720ed0	368	} for f in metadata['videos']]
1c35b3da RA	369
	370	m3u8_url = metadata.get('hlsManifestUrl')
	371	if m3u8_url:
	372	formats.extend(self._extract_m3u8_formats(
	373	m3u8_url, video_id, 'mp4', 'm3u8_native',
	374	m3u8_id='hls', fatal=False))
	375
	376	dash_manifest = metadata.get('metadataEmbedded')
	377	if dash_manifest:
	378	formats.extend(self._parse_mpd_formats(
	379	compat_etree_fromstring(dash_manifest), 'mpd'))
	380
	381	for fmt in formats:
	382	fmt_type = self._search_regex(
	383	r'\btype[/=](\d)', fmt['url'],
	384	'format type', default=None)
	385	if fmt_type:
	386	fmt['quality'] = quality(fmt_type)
	387
8005dc68 S	388	# Live formats
	389	m3u8_url = metadata.get('hlsMasterPlaylistUrl')
	390	if m3u8_url:
	391	formats.extend(self._extract_m3u8_formats(
177877c5	392	m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
8005dc68 S	393	rtmp_url = metadata.get('rtmpUrl')
	394	if rtmp_url:
	395	formats.append({
	396	'url': rtmp_url,
	397	'format_id': 'rtmp',
	398	'ext': 'flv',
	399	})
	400
15870747	401	if not formats:
	402	payment_info = metadata.get('paymentInfo')
	403	if payment_info:
b7da73eb	404	self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
15870747	405
88720ed0 S	406	info['formats'] = formats
88720ed0 S	407	return info
d984a98d THD	408
	409	def _extract_mobile(self, url):
	410	video_id = self._match_id(url)
	411
	412	webpage = self._download_webpage(
	413	'http://m.ok.ru/video/%s' % video_id, video_id,
	414	note='Downloading mobile webpage')
	415
	416	error = self._search_regex(
	417	r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
	418	webpage, 'error', default=None)
	419	if error:
	420	raise ExtractorError(error, expected=True)
	421
	422	json_data = self._search_regex(
	423	r'data-video="(.+?)"', webpage, 'json data')
	424	json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
	425
	426	return {
	427	'id': video_id,
	428	'title': json_data.get('videoName'),
	429	'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
	430	'thumbnail': json_data.get('videoPosterSrc'),
	431	'formats': [{
	432	'format_id': 'mobile',
	433	'url': json_data.get('videoSrc'),
	434	'ext': 'mp4',
	435	}]
	436	}