[yt-dlp.git] / yt_dlp / extractor / odnoklassniki.py

import urllib.parse

from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..networking import HEADRequest
from ..utils import (
    ExtractorError,
    float_or_none,
    int_or_none,
    qualities,
    smuggle_url,
    traverse_obj,
    unescapeHTML,
    unified_strdate,
    unsmuggle_url,
    url_or_none,
    urlencode_postdata,
)


class OdnoklassnikiIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                https?://
                    (?:(?:www|m|mobile)\.)?
                    (?:odnoklassniki|ok)\.ru/
                    (?:
                        video(?P<embed>embed)?/|
                        web-api/video/moviePlayer/|
                        live/|
                        dk\?.*?st\.mvId=
                    )
                    (?P<id>[\d-]+)
                '''
    _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
    _TESTS = [{
        'note': 'Coub embedded',
        'url': 'http://ok.ru/video/1484130554189',
        'info_dict': {
            'id': '1keok9',
            'ext': 'mp4',
            'timestamp': 1545580896,
            'view_count': int,
            'thumbnail': r're:^https?://.*\.jpg$',
            'title': 'Народная забава',
            'uploader': 'Nevata',
            'upload_date': '20181223',
            'age_limit': 0,
            'uploader_id': 'nevata.s',
            'like_count': int,
            'duration': 8.08,
            'repost_count': int,
        },
    }, {
        'note': 'vk.com embedded',
        'url': 'https://ok.ru/video/3568183087575',
        'info_dict': {
            'id': '-165101755_456243749',
            'ext': 'mp4',
            'uploader_id': '-165101755',
            'duration': 132,
            'timestamp': 1642869935,
            'upload_date': '20220122',
            'thumbnail': str,
            'title': str,
            'uploader': str,
        },
        'skip': 'vk extractor error',
    }, {
        # metadata in JSON, webm_dash with Firefox UA
        'url': 'http://ok.ru/video/20079905452',
        'md5': '8f477d8931c531374a3e36daec617b2c',
        'info_dict': {
            'id': '20079905452',
            'ext': 'webm',
            'title': 'Культура меняет нас (прекрасный ролик!))',
            'thumbnail': str,
            'duration': 100,
            'upload_date': '20141207',
            'uploader_id': '330537914540',
            'uploader': 'Виталий Добровольский',
            'like_count': int,
            'age_limit': 0,
        },
        'params': {
            'format': 'bv[ext=webm]',
            'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0'},
        },
    }, {
        # metadataUrl
        'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
        'md5': '2bae2f58eefe1b3d26f3926c4a64d2f3',
        'info_dict': {
            'id': '63567059965189-0',
            'ext': 'mp4',
            'title': 'Девушка без комплексов ...',
            'thumbnail': str,
            'duration': 191,
            'upload_date': '20150518',
            'uploader_id': '534380003155',
            'uploader': '☭ Андрей Мещанинов ☭',
            'like_count': int,
            'age_limit': 0,
            'start_time': 5,
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
        'url': 'https://ok.ru/video/3952212382174',
        'md5': '5fb5f83ce16cb212d6bf887282b5da53',
        'info_dict': {
            'id': '5axVgHHDBvU',
            'ext': 'mp4',
            'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
            'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
            'uploader': 'Lod Mer',
            'uploader_id': '575186401502',
            'duration': 1529,
            'age_limit': 0,
            'upload_date': '20210405',
            'comment_count': int,
            'live_status': 'not_live',
            'view_count': int,
            'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
            'uploader_url': 'https://www.youtube.com/@MrKewlkid94',
            'channel_follower_count': int,
            'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
            'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
            'like_count': int,
            'availability': 'public',
            'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
            'categories': ['Education'],
            'playable_in_embed': True,
            'channel': 'BornToReact',
        },
    }, {
        # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
        'url': 'http://ok.ru/video/62036049272859-0',
        'info_dict': {
            'id': '62036049272859-0',
            'ext': 'mp4',
            'title': 'МУЗЫКА     ДОЖДЯ .',
            'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
            'upload_date': '20120106',
            'uploader_id': '473534735899',
            'uploader': 'МARINA D',
            'age_limit': 0,
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Video has not been found',
    }, {
        'note': 'Only available in mobile webpage',
        'url': 'https://m.ok.ru/video/2361249957145',
        'info_dict': {
            'id': '2361249957145',
            'ext': 'mp4',
            'title': 'Быковское крещение',
            'duration': 3038.181,
            'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
        },
    }, {
        'note': 'subtitles',
        'url': 'https://ok.ru/video/4249587550747',
        'info_dict': {
            'id': '4249587550747',
            'ext': 'mp4',
            'title': 'Small Country An African Childhood (2020) (1080p) +subtitle',
            'uploader': 'Sunflower Movies',
            'uploader_id': '595802161179',
            'upload_date': '20220816',
            'duration': 6728,
            'age_limit': 0,
            'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
            'like_count': int,
            'subtitles': dict,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
        'only_matching': True,
    }, {
        'url': 'http://www.ok.ru/video/20648036891',
        'only_matching': True,
    }, {
        'url': 'http://www.ok.ru/videoembed/20648036891',
        'only_matching': True,
    }, {
        'url': 'http://m.ok.ru/video/20079905452',
        'only_matching': True,
    }, {
        'url': 'http://mobile.ok.ru/video/20079905452',
        'only_matching': True,
    }, {
        'url': 'https://www.ok.ru/live/484531969818',
        'only_matching': True,
    }, {
        'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
        'only_matching': True,
    }, {
        # Paid video
        'url': 'https://ok.ru/video/954886983203',
        'only_matching': True,
    }, {
        'url': 'https://ok.ru/videoembed/2932705602075',
        'info_dict': {
            'id': '2932705602075',
            'ext': 'mp4',
            'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
            'title': 'Boosty для тебя!',
            'uploader_id': '597811038747',
            'like_count': 0,
            'duration': 35,
        },
    }]

    _WEBPAGE_TESTS = [{
        'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
        'info_dict': {
            'id': '3950343629563',
            'ext': 'mp4',
            'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
            'title': 'Заяц Бусти.mp4',
            'uploader_id': '571368965883',
            'like_count': 0,
            'duration': 10444,
        },
        'skip': 'Site no longer embeds',
    }]

    def _clear_cookies(self, cdn_url):
        # Direct http downloads will fail if CDN cookies are set
        # so we need to reset them after each format extraction
        self.cookiejar.clear(domain='.mycdn.me')
        self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)

    @classmethod
    def _extract_embed_urls(cls, url, webpage):
        for x in super()._extract_embed_urls(url, webpage):
            yield smuggle_url(x, {'referrer': url})

    def _real_extract(self, url):
        try:
            return self._extract_desktop(url)
        except ExtractorError as e:
            try:
                return self._extract_mobile(url)
            except ExtractorError:
                # error message of desktop webpage is in English
                raise e

    def _extract_desktop(self, url):
        start_time = int_or_none(urllib.parse.parse_qs(
            urllib.parse.urlparse(url).query).get('fromTime', [None])[0])

        url, smuggled = unsmuggle_url(url, {})
        video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
        mode = 'videoembed' if is_embed else 'video'

        webpage = self._download_webpage(
            f'https://ok.ru/{mode}/{video_id}', video_id,
            note='Downloading desktop webpage',
            headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})

        error = self._search_regex(
            r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
            webpage, 'error', default=None)
        # Direct link from boosty
        if (error == 'The author of this video has not been found or is blocked'
                and not smuggled.get('referrer') and mode == 'videoembed'):
            return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
        elif error:
            raise ExtractorError(error, expected=True)

        player = self._parse_json(
            unescapeHTML(self._search_regex(
                rf'data-options=(?P<quote>["\'])(?P<player>{{.+?{video_id}.+?}})(?P=quote)',
                webpage, 'player', group='player')),
            video_id)

        # embedded external player
        if player.get('isExternalPlayer') and player.get('url'):
            return self.url_result(player['url'])

        flashvars = player['flashvars']

        metadata = flashvars.get('metadata')
        if metadata:
            metadata = self._parse_json(metadata, video_id)
        else:
            data = {}
            st_location = flashvars.get('location')
            if st_location:
                data['st.location'] = st_location
            metadata = self._download_json(
                urllib.parse.unquote(flashvars['metadataUrl']),
                video_id, 'Downloading metadata JSON',
                data=urlencode_postdata(data))

        movie = metadata['movie']

        # Some embedded videos may not contain title in movie dict (e.g.
        # http://ok.ru/video/62036049272859-0) thus we allow missing title
        # here and it's going to be extracted later by an extractor that
        # will process the actual embed.
        provider = metadata.get('provider')
        title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')

        thumbnail = movie.get('poster')
        duration = int_or_none(movie.get('duration'))

        author = metadata.get('author', {})
        uploader_id = author.get('id')
        uploader = author.get('name')

        upload_date = unified_strdate(self._html_search_meta(
            'ya:ovs:upload_date', webpage, 'upload date', default=None))

        age_limit = None
        adult = self._html_search_meta(
            'ya:ovs:adult', webpage, 'age limit', default=None)
        if adult:
            age_limit = 18 if adult == 'true' else 0

        like_count = int_or_none(metadata.get('likeCount'))

        subtitles = {}
        for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict):
            sub_url = sub.get('url')
            if not sub_url:
                continue
            subtitles.setdefault(sub.get('language') or 'en', []).append({
                'url': sub_url,
                'ext': 'vtt',
            })

        info = {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'upload_date': upload_date,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'like_count': like_count,
            'age_limit': age_limit,
            'start_time': start_time,
            'subtitles': subtitles,
        }

        # pladform
        if provider == 'OPEN_GRAPH':
            info.update({
                '_type': 'url_transparent',
                'url': movie['contentId'],
            })
            return info

        if provider == 'USER_YOUTUBE':
            info.update({
                '_type': 'url_transparent',
                'url': movie['contentId'],
            })
            return info

        assert title
        if provider == 'LIVE_TV_APP':
            info['title'] = title

        quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))

        formats = [{
            'url': f['url'],
            'ext': 'mp4',
            'format_id': f.get('name'),
        } for f in traverse_obj(metadata, ('videos', lambda _, v: url_or_none(v['url'])))]

        m3u8_url = traverse_obj(metadata, 'hlsManifestUrl', 'ondemandHls')
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False))
            self._clear_cookies(m3u8_url)

        for mpd_id, mpd_key in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
            mpd_url = metadata.get(mpd_key)
            if mpd_url:
                formats.extend(self._extract_mpd_formats(
                    mpd_url, video_id, mpd_id=mpd_id, fatal=False))
                self._clear_cookies(mpd_url)

        dash_manifest = metadata.get('metadataEmbedded')
        if dash_manifest:
            formats.extend(self._parse_mpd_formats(
                compat_etree_fromstring(dash_manifest), 'mpd'))

        for fmt in formats:
            fmt_type = self._search_regex(
                r'\btype[/=](\d)', fmt['url'],
                'format type', default=None)
            if fmt_type:
                fmt['quality'] = quality(fmt_type)

        # Live formats
        m3u8_url = metadata.get('hlsMasterPlaylistUrl')
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
            self._clear_cookies(m3u8_url)
        rtmp_url = metadata.get('rtmpUrl')
        if rtmp_url:
            formats.append({
                'url': rtmp_url,
                'format_id': 'rtmp',
                'ext': 'flv',
            })

        if not formats:
            payment_info = metadata.get('paymentInfo')
            if payment_info:
                self.raise_no_formats('This video is paid, subscribe to download it', expected=True)

        info['formats'] = formats
        return info

    def _extract_mobile(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            f'http://m.ok.ru/video/{video_id}', video_id,
            note='Downloading mobile webpage')

        error = self._search_regex(
            r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
            webpage, 'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        json_data = self._search_regex(
            r'data-video="(.+?)"', webpage, 'json data')
        json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}

        redirect_url = self._request_webpage(HEADRequest(
            json_data['videoSrc']), video_id, 'Requesting download URL').url
        self._clear_cookies(redirect_url)

        return {
            'id': video_id,
            'title': json_data.get('videoName'),
            'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
            'thumbnail': json_data.get('videoPosterSrc'),
            'formats': [{
                'format_id': 'mobile',
                'url': redirect_url,
                'ext': 'mp4',
            }],
        }
Commit	Line	Data
1a2eb5bd	1	import urllib.parse
1a2eb5bd	2
4ffbf778	3	from .common import InfoExtractor
add96eb9	4	from ..compat import compat_etree_fromstring
3d2623a8	5	from ..networking import HEADRequest
4ffbf778	6	from ..utils import (
1806a754	7	ExtractorError,
d984a98d	8	float_or_none,
4ffbf778 S	9	int_or_none,
4ffbf778 S	10	qualities,
8196182a	11	smuggle_url,
b23b503e	12	traverse_obj,
372744c5	13	unescapeHTML,
8196182a	14	unified_strdate,
8196182a	15	unsmuggle_url,
1a2eb5bd	16	url_or_none,
a3474aa5	17	urlencode_postdata,
4ffbf778 S	18	)
	19
	20
	21	class OdnoklassnikiIE(InfoExtractor):
d04ca976 S	22	_VALID_URL = r'''(?x)
	23	https?://
	24	(?:(?:www\|m\|mobile)\.)?
	25	(?:odnoklassniki\|ok)\.ru/
	26	(?:
8196182a	27	video(?P<embed>embed)?/\|
d04ca976 S	28	web-api/video/moviePlayer/\|
	29	live/\|
	30	dk\?.*?st\.mvId=
	31	)
	32	(?P<id>[\d-]+)
	33	'''
bfd973ec	34	_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki\|ok)\.ru/videoembed/.+?)\1']
4ffbf778	35	_TESTS = [{
b8b3f456 K	36	'note': 'Coub embedded',
	37	'url': 'http://ok.ru/video/1484130554189',
	38	'info_dict': {
	39	'id': '1keok9',
	40	'ext': 'mp4',
	41	'timestamp': 1545580896,
	42	'view_count': int,
1a2eb5bd	43	'thumbnail': r're:^https?://.*\.jpg$',
b8b3f456 K	44	'title': 'Народная забава',
	45	'uploader': 'Nevata',
	46	'upload_date': '20181223',
	47	'age_limit': 0,
	48	'uploader_id': 'nevata.s',
	49	'like_count': int,
	50	'duration': 8.08,
	51	'repost_count': int,
	52	},
	53	}, {
	54	'note': 'vk.com embedded',
	55	'url': 'https://ok.ru/video/3568183087575',
	56	'info_dict': {
	57	'id': '-165101755_456243749',
	58	'ext': 'mp4',
	59	'uploader_id': '-165101755',
	60	'duration': 132,
	61	'timestamp': 1642869935,
	62	'upload_date': '20220122',
	63	'thumbnail': str,
	64	'title': str,
	65	'uploader': str,
	66	},
1a2eb5bd	67	'skip': 'vk extractor error',
b8b3f456	68	}, {
1a2eb5bd	69	# metadata in JSON, webm_dash with Firefox UA
4ffbf778	70	'url': 'http://ok.ru/video/20079905452',
1a2eb5bd	71	'md5': '8f477d8931c531374a3e36daec617b2c',
4ffbf778 S	72	'info_dict': {
4ffbf778 S	73	'id': '20079905452',
1a2eb5bd	74	'ext': 'webm',
4ffbf778	75	'title': 'Культура меняет нас (прекрасный ролик!))',
8196182a	76	'thumbnail': str,
4ffbf778	77	'duration': 100,
887e9bc7	78	'upload_date': '20141207',
4ffbf778 S	79	'uploader_id': '330537914540',
	80	'uploader': 'Виталий Добровольский',
	81	'like_count': int,
9f2e7c2f	82	'age_limit': 0,
c6bbdadd	83	},
1a2eb5bd	84	'params': {
	85	'format': 'bv[ext=webm]',
	86	'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0'},
	87	},
c6bbdadd S	88	}, {
c6bbdadd S	89	# metadataUrl
c9fd5306	90	'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
1a2eb5bd	91	'md5': '2bae2f58eefe1b3d26f3926c4a64d2f3',
c6bbdadd S	92	'info_dict': {
	93	'id': '63567059965189-0',
	94	'ext': 'mp4',
	95	'title': 'Девушка без комплексов ...',
8196182a	96	'thumbnail': str,
c6bbdadd	97	'duration': 191,
887e9bc7	98	'upload_date': '20150518',
c6bbdadd	99	'uploader_id': '534380003155',
887e9bc7	100	'uploader': '☭ Андрей Мещанинов ☭',
c6bbdadd	101	'like_count': int,
9f2e7c2f	102	'age_limit': 0,
c9fd5306	103	'start_time': 5,
4ffbf778	104	},
1a2eb5bd	105	'params': {'skip_download': 'm3u8'},
88720ed0 S	106	}, {
88720ed0 S	107	# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
8196182a	108	'url': 'https://ok.ru/video/3952212382174',
1a2eb5bd	109	'md5': '5fb5f83ce16cb212d6bf887282b5da53',
88720ed0	110	'info_dict': {
8196182a	111	'id': '5axVgHHDBvU',
88720ed0	112	'ext': 'mp4',
8196182a	113	'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
	114	'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
	115	'uploader': 'Lod Mer',
	116	'uploader_id': '575186401502',
	117	'duration': 1529,
88720ed0	118	'age_limit': 0,
8196182a	119	'upload_date': '20210405',
	120	'comment_count': int,
	121	'live_status': 'not_live',
	122	'view_count': int,
	123	'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
1a2eb5bd	124	'uploader_url': 'https://www.youtube.com/@MrKewlkid94',
8196182a	125	'channel_follower_count': int,
	126	'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
	127	'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
	128	'like_count': int,
	129	'availability': 'public',
	130	'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
	131	'categories': ['Education'],
	132	'playable_in_embed': True,
	133	'channel': 'BornToReact',
88720ed0	134	},
749b0046 S	135	}, {
	136	# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
	137	'url': 'http://ok.ru/video/62036049272859-0',
	138	'info_dict': {
	139	'id': '62036049272859-0',
	140	'ext': 'mp4',
	141	'title': 'МУЗЫКА ДОЖДЯ .',
	142	'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
	143	'upload_date': '20120106',
	144	'uploader_id': '473534735899',
	145	'uploader': 'МARINA D',
	146	'age_limit': 0,
	147	},
	148	'params': {
	149	'skip_download': True,
	150	},
58f6ab72	151	'skip': 'Video has not been found',
d984a98d THD	152	}, {
	153	'note': 'Only available in mobile webpage',
	154	'url': 'https://m.ok.ru/video/2361249957145',
	155	'info_dict': {
	156	'id': '2361249957145',
8196182a	157	'ext': 'mp4',
d984a98d THD	158	'title': 'Быковское крещение',
d984a98d THD	159	'duration': 3038.181,
1a2eb5bd	160	'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
d984a98d	161	},
b23b503e	162	}, {
	163	'note': 'subtitles',
	164	'url': 'https://ok.ru/video/4249587550747',
	165	'info_dict': {
	166	'id': '4249587550747',
	167	'ext': 'mp4',
	168	'title': 'Small Country An African Childhood (2020) (1080p) +subtitle',
	169	'uploader': 'Sunflower Movies',
	170	'uploader_id': '595802161179',
	171	'upload_date': '20220816',
	172	'duration': 6728,
	173	'age_limit': 0,
	174	'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
	175	'like_count': int,
	176	'subtitles': dict,
	177	},
	178	'params': {
	179	'skip_download': True,
	180	},
4ffbf778 S	181	}, {
	182	'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
	183	'only_matching': True,
cdc8d0c3 YCH	184	}, {
	185	'url': 'http://www.ok.ru/video/20648036891',
	186	'only_matching': True,
d762f86e S	187	}, {
	188	'url': 'http://www.ok.ru/videoembed/20648036891',
	189	'only_matching': True,
10e6ed93 S	190	}, {
	191	'url': 'http://m.ok.ru/video/20079905452',
	192	'only_matching': True,
	193	}, {
	194	'url': 'http://mobile.ok.ru/video/20079905452',
	195	'only_matching': True,
8005dc68 S	196	}, {
	197	'url': 'https://www.ok.ru/live/484531969818',
	198	'only_matching': True,
608c738c G	199	}, {
	200	'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
	201	'only_matching': True,
15870747	202	}, {
	203	# Paid video
	204	'url': 'https://ok.ru/video/954886983203',
	205	'only_matching': True,
8196182a	206	}, {
	207	'url': 'https://ok.ru/videoembed/2932705602075',
	208	'info_dict': {
	209	'id': '2932705602075',
	210	'ext': 'mp4',
	211	'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
	212	'title': 'Boosty для тебя!',
	213	'uploader_id': '597811038747',
	214	'like_count': 0,
	215	'duration': 35,
	216	},
	217	}]
	218
	219	_WEBPAGE_TESTS = [{
	220	'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
	221	'info_dict': {
	222	'id': '3950343629563',
	223	'ext': 'mp4',
	224	'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
	225	'title': 'Заяц Бусти.mp4',
	226	'uploader_id': '571368965883',
	227	'like_count': 0,
	228	'duration': 10444,
	229	},
b23b503e	230	'skip': 'Site no longer embeds',
4ffbf778 S	231	}]
4ffbf778 S	232
1a2eb5bd	233	def _clear_cookies(self, cdn_url):
	234	# Direct http downloads will fail if CDN cookies are set
	235	# so we need to reset them after each format extraction
ad54c913	236	self.cookiejar.clear(domain='.mycdn.me')
ad54c913	237	self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)
1a2eb5bd	238
8196182a	239	@classmethod
	240	def _extract_embed_urls(cls, url, webpage):
	241	for x in super()._extract_embed_urls(url, webpage):
	242	yield smuggle_url(x, {'referrer': url})
	243
4ffbf778	244	def _real_extract(self, url):
d984a98d THD	245	try:
	246	return self._extract_desktop(url)
	247	except ExtractorError as e:
	248	try:
	249	return self._extract_mobile(url)
	250	except ExtractorError:
	251	# error message of desktop webpage is in English
	252	raise e
	253
	254	def _extract_desktop(self, url):
add96eb9	255	start_time = int_or_none(urllib.parse.parse_qs(
add96eb9	256	urllib.parse.urlparse(url).query).get('fromTime', [None])[0])
c9fd5306	257
8196182a	258	url, smuggled = unsmuggle_url(url, {})
	259	video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
	260	mode = 'videoembed' if is_embed else 'video'
4ffbf778	261
ba2df04b	262	webpage = self._download_webpage(
8196182a	263	f'https://ok.ru/{mode}/{video_id}', video_id,
	264	note='Downloading desktop webpage',
	265	headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
4ffbf778	266
1806a754 S	267	error = self._search_regex(
	268	r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
	269	webpage, 'error', default=None)
8196182a	270	# Direct link from boosty
	271	if (error == 'The author of this video has not been found or is blocked'
	272	and not smuggled.get('referrer') and mode == 'videoembed'):
	273	return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
	274	elif error:
1806a754 S	275	raise ExtractorError(error, expected=True)
1806a754 S	276
4ffbf778	277	player = self._parse_json(
372744c5	278	unescapeHTML(self._search_regex(
add96eb9	279	rf'data-options=(?P<quote>["\'])(?P<player>{{.+?{video_id}.+?}})(?P=quote)',
1e804244	280	webpage, 'player', group='player')),
4ffbf778 S	281	video_id)
4ffbf778 S	282
b8b3f456 K	283	# embedded external player
	284	if player.get('isExternalPlayer') and player.get('url'):
	285	return self.url_result(player['url'])
	286
c6bbdadd S	287	flashvars = player['flashvars']
	288
	289	metadata = flashvars.get('metadata')
	290	if metadata:
	291	metadata = self._parse_json(metadata, video_id)
	292	else:
a3474aa5 RA	293	data = {}
	294	st_location = flashvars.get('location')
	295	if st_location:
	296	data['st.location'] = st_location
c6bbdadd	297	metadata = self._download_json(
add96eb9	298	urllib.parse.unquote(flashvars['metadataUrl']),
a3474aa5 RA	299	video_id, 'Downloading metadata JSON',
a3474aa5 RA	300	data=urlencode_postdata(data))
4ffbf778 S	301
4ffbf778 S	302	movie = metadata['movie']
749b0046 S	303
	304	# Some embedded videos may not contain title in movie dict (e.g.
	305	# http://ok.ru/video/62036049272859-0) thus we allow missing title
	306	# here and it's going to be extracted later by an extractor that
	307	# will process the actual embed.
	308	provider = metadata.get('provider')
	309	title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
	310
4ffbf778 S	311	thumbnail = movie.get('poster')
	312	duration = int_or_none(movie.get('duration'))
	313
	314	author = metadata.get('author', {})
	315	uploader_id = author.get('id')
	316	uploader = author.get('name')
	317
	318	upload_date = unified_strdate(self._html_search_meta(
c6bbdadd	319	'ya:ovs:upload_date', webpage, 'upload date', default=None))
4ffbf778 S	320
	321	age_limit = None
	322	adult = self._html_search_meta(
c6bbdadd	323	'ya:ovs:adult', webpage, 'age limit', default=None)
4ffbf778 S	324	if adult:
	325	age_limit = 18 if adult == 'true' else 0
	326
	327	like_count = int_or_none(metadata.get('likeCount'))
	328
b23b503e	329	subtitles = {}
	330	for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict):
	331	sub_url = sub.get('url')
	332	if not sub_url:
	333	continue
	334	subtitles.setdefault(sub.get('language') or 'en', []).append({
	335	'url': sub_url,
	336	'ext': 'vtt',
	337	})
	338
88720ed0	339	info = {
4ffbf778 S	340	'id': video_id,
	341	'title': title,
	342	'thumbnail': thumbnail,
	343	'duration': duration,
	344	'upload_date': upload_date,
	345	'uploader': uploader,
	346	'uploader_id': uploader_id,
	347	'like_count': like_count,
	348	'age_limit': age_limit,
c9fd5306	349	'start_time': start_time,
b23b503e	350	'subtitles': subtitles,
4ffbf778	351	}
88720ed0	352
b8b3f456 K	353	# pladform
	354	if provider == 'OPEN_GRAPH':
	355	info.update({
	356	'_type': 'url_transparent',
	357	'url': movie['contentId'],
	358	})
	359	return info
	360
749b0046	361	if provider == 'USER_YOUTUBE':
88720ed0 S	362	info.update({
	363	'_type': 'url_transparent',
	364	'url': movie['contentId'],
	365	})
	366	return info
	367
8005dc68 S	368	assert title
8005dc68 S	369	if provider == 'LIVE_TV_APP':
39ca3b5c	370	info['title'] = title
8005dc68	371
8196182a	372	quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
88720ed0 S	373
	374	formats = [{
	375	'url': f['url'],
	376	'ext': 'mp4',
1a2eb5bd	377	'format_id': f.get('name'),
1a2eb5bd	378	} for f in traverse_obj(metadata, ('videos', lambda _, v: url_or_none(v['url'])))]
1c35b3da	379
1a2eb5bd	380	m3u8_url = traverse_obj(metadata, 'hlsManifestUrl', 'ondemandHls')
1c35b3da RA	381	if m3u8_url:
	382	formats.extend(self._extract_m3u8_formats(
	383	m3u8_url, video_id, 'mp4', 'm3u8_native',
	384	m3u8_id='hls', fatal=False))
1a2eb5bd	385	self._clear_cookies(m3u8_url)
	386
	387	for mpd_id, mpd_key in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
	388	mpd_url = metadata.get(mpd_key)
	389	if mpd_url:
	390	formats.extend(self._extract_mpd_formats(
	391	mpd_url, video_id, mpd_id=mpd_id, fatal=False))
	392	self._clear_cookies(mpd_url)
1c35b3da RA	393
	394	dash_manifest = metadata.get('metadataEmbedded')
	395	if dash_manifest:
	396	formats.extend(self._parse_mpd_formats(
	397	compat_etree_fromstring(dash_manifest), 'mpd'))
	398
	399	for fmt in formats:
	400	fmt_type = self._search_regex(
	401	r'\btype[/=](\d)', fmt['url'],
	402	'format type', default=None)
	403	if fmt_type:
	404	fmt['quality'] = quality(fmt_type)
	405
8005dc68 S	406	# Live formats
	407	m3u8_url = metadata.get('hlsMasterPlaylistUrl')
	408	if m3u8_url:
	409	formats.extend(self._extract_m3u8_formats(
177877c5	410	m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
1a2eb5bd	411	self._clear_cookies(m3u8_url)
8005dc68 S	412	rtmp_url = metadata.get('rtmpUrl')
	413	if rtmp_url:
	414	formats.append({
	415	'url': rtmp_url,
	416	'format_id': 'rtmp',
	417	'ext': 'flv',
	418	})
	419
15870747	420	if not formats:
	421	payment_info = metadata.get('paymentInfo')
	422	if payment_info:
b7da73eb	423	self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
15870747	424
88720ed0 S	425	info['formats'] = formats
88720ed0 S	426	return info
d984a98d THD	427
	428	def _extract_mobile(self, url):
	429	video_id = self._match_id(url)
	430
	431	webpage = self._download_webpage(
add96eb9	432	f'http://m.ok.ru/video/{video_id}', video_id,
d984a98d THD	433	note='Downloading mobile webpage')
	434
	435	error = self._search_regex(
	436	r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
	437	webpage, 'error', default=None)
	438	if error:
	439	raise ExtractorError(error, expected=True)
	440
	441	json_data = self._search_regex(
	442	r'data-video="(.+?)"', webpage, 'json data')
	443	json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
	444
1a2eb5bd	445	redirect_url = self._request_webpage(HEADRequest(
3d2623a8	446	json_data['videoSrc']), video_id, 'Requesting download URL').url
1a2eb5bd	447	self._clear_cookies(redirect_url)
1a2eb5bd	448
d984a98d THD	449	return {
	450	'id': video_id,
	451	'title': json_data.get('videoName'),
	452	'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
	453	'thumbnail': json_data.get('videoPosterSrc'),
	454	'formats': [{
	455	'format_id': 'mobile',
1a2eb5bd	456	'url': redirect_url,
d984a98d	457	'ext': 'mp4',
add96eb9	458	}],
d984a98d	459	}