[yt-dlp.git] / yt_dlp / extractor / kinopoisk.py

from .common import InfoExtractor
from ..utils import (
    dict_get,
    int_or_none,
)


class KinoPoiskIE(InfoExtractor):
    _GEO_COUNTRIES = ['RU']
    _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.kinopoisk.ru/film/81041/watch/',
        'md5': '4f71c80baea10dfa54a837a46111d326',
        'info_dict': {
            'id': '81041',
            'ext': 'mp4',
            'title': 'Алеша попович и тугарин змей',
            'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
            'thumbnail': r're:^https?://.*',
            'duration': 4533,
            'age_limit': 12,
        },
    }, {
        'url': 'https://www.kinopoisk.ru/film/81041',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
            query={'kpId': video_id})

        data = self._parse_json(
            self._search_regex(
                r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
                webpage, 'data'),
            video_id)['models']

        film = data['filmStatus']
        title = film.get('title') or film['originalTitle']

        formats = self._extract_m3u8_formats(
            data['playlistEntity']['uri'], video_id, 'mp4',
            entry_protocol='m3u8_native', m3u8_id='hls')

        description = dict_get(
            film, ('descriptscription', 'description',
                   'shortDescriptscription', 'shortDescription'))
        thumbnail = film.get('coverUrl') or film.get('posterUrl')
        duration = int_or_none(film.get('duration'))
        age_limit = int_or_none(film.get('restrictionAge'))

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'age_limit': age_limit,
            'formats': formats,
        }
Commit	Line	Data
df4d817b S	1	from .common import InfoExtractor
	2	from ..utils import (
	3	dict_get,
	4	int_or_none,
	5	)
	6
	7
	8	class KinoPoiskIE(InfoExtractor):
	9	_GEO_COUNTRIES = ['RU']
	10	_VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
	11	_TESTS = [{
	12	'url': 'https://www.kinopoisk.ru/film/81041/watch/',
	13	'md5': '4f71c80baea10dfa54a837a46111d326',
	14	'info_dict': {
	15	'id': '81041',
	16	'ext': 'mp4',
	17	'title': 'Алеша попович и тугарин змей',
	18	'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
	19	'thumbnail': r're:^https?://.*',
	20	'duration': 4533,
	21	'age_limit': 12,
	22	},
df4d817b S	23	}, {
	24	'url': 'https://www.kinopoisk.ru/film/81041',
	25	'only_matching': True,
	26	}]
	27
	28	def _real_extract(self, url):
	29	video_id = self._match_id(url)
	30
	31	webpage = self._download_webpage(
	32	'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
	33	query={'kpId': video_id})
	34
	35	data = self._parse_json(
	36	self._search_regex(
	37	r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
	38	webpage, 'data'),
	39	video_id)['models']
	40
	41	film = data['filmStatus']
	42	title = film.get('title') or film['originalTitle']
	43
	44	formats = self._extract_m3u8_formats(
	45	data['playlistEntity']['uri'], video_id, 'mp4',
	46	entry_protocol='m3u8_native', m3u8_id='hls')
df4d817b S	47
	48	description = dict_get(
	49	film, ('descriptscription', 'description',
	50	'shortDescriptscription', 'shortDescription'))
	51	thumbnail = film.get('coverUrl') or film.get('posterUrl')
	52	duration = int_or_none(film.get('duration'))
	53	age_limit = int_or_none(film.get('restrictionAge'))
	54
	55	return {
	56	'id': video_id,
	57	'title': title,
	58	'description': description,
	59	'thumbnail': thumbnail,
	60	'duration': duration,
	61	'age_limit': age_limit,
	62	'formats': formats,
	63	}