yt_dlp/extractor/kinopoisk.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     dict_get,
   7     int_or_none,
   8 )
   9
  10
  11 class KinoPoiskIE(InfoExtractor):
  12     _GEO_COUNTRIES = ['RU']
  13     _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
  14     _TESTS = [{
  15         'url': 'https://www.kinopoisk.ru/film/81041/watch/',
  16         'md5': '4f71c80baea10dfa54a837a46111d326',
  17         'info_dict': {
  18             'id': '81041',
  19             'ext': 'mp4',
  20             'title': 'Алеша попович и тугарин змей',
  21             'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
  22             'thumbnail': r're:^https?://.*',
  23             'duration': 4533,
  24             'age_limit': 12,
  25         },
  26     }, {
  27         'url': 'https://www.kinopoisk.ru/film/81041',
  28         'only_matching': True,
  29     }]
  30
  31     def _real_extract(self, url):
  32         video_id = self._match_id(url)
  33
  34         webpage = self._download_webpage(
  35             'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
  36             query={'kpId': video_id})
  37
  38         data = self._parse_json(
  39             self._search_regex(
  40                 r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
  41                 webpage, 'data'),
  42             video_id)['models']
  43
  44         film = data['filmStatus']
  45         title = film.get('title') or film['originalTitle']
  46
  47         formats = self._extract_m3u8_formats(
  48             data['playlistEntity']['uri'], video_id, 'mp4',
  49             entry_protocol='m3u8_native', m3u8_id='hls')
  50         self._sort_formats(formats)
  51
  52         description = dict_get(
  53             film, ('descriptscription', 'description',
  54                    'shortDescriptscription', 'shortDescription'))
  55         thumbnail = film.get('coverUrl') or film.get('posterUrl')
  56         duration = int_or_none(film.get('duration'))
  57         age_limit = int_or_none(film.get('restrictionAge'))
  58
  59         return {
  60             'id': video_id,
  61             'title': title,
  62             'description': description,
  63             'thumbnail': thumbnail,
  64             'duration': duration,
  65             'age_limit': age_limit,
  66             'formats': formats,
  67         }