yt_dlp/extractor/duoplay.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     ExtractorError,
   4     extract_attributes,
   5     get_element_text_and_html_by_tag,
   6     int_or_none,
   7     join_nonempty,
   8     str_or_none,
   9     try_call,
  10     unified_timestamp,
  11 )
  12 from ..utils.traversal import traverse_obj
  13
  14
  15 class DuoplayIE(InfoExtractor):
  16     _VALID_URL = r'https://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
  17     _TESTS = [{
  18         'note': 'Siberi võmm S02E12',
  19         'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
  20         'md5': '1ff59d535310ac9c5cf5f287d8f91b2d',
  21         'info_dict': {
  22             'id': '4312_24',
  23             'ext': 'mp4',
  24             'title': 'Operatsioon "Öö"',
  25             'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
  26             'description': 'md5:8ef98f38569d6b8b78f3d350ccc6ade8',
  27             'upload_date': '20170523',
  28             'timestamp': 1495567800,
  29             'series': 'Siberi võmm',
  30             'series_id': '4312',
  31             'season': 'Season 2',
  32             'season_number': 2,
  33             'episode': 'Operatsioon "Öö"',
  34             'episode_number': 12,
  35             'episode_id': 24,
  36         },
  37     }, {
  38         'note': 'Empty title',
  39         'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
  40         'md5': '6aca68be71112314738dd17cced7f8bf',
  41         'info_dict': {
  42             'id': '17_14',
  43             'ext': 'mp4',
  44             'title': 'Ühikarotid',
  45             'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
  46             'description': 'md5:4719b418e058c209def41d48b601276e',
  47             'upload_date': '20100916',
  48             'timestamp': 1284661800,
  49             'series': 'Ühikarotid',
  50             'series_id': '17',
  51             'season': 'Season 2',
  52             'season_number': 2,
  53             'episode_id': 14,
  54             'release_year': 2010,
  55         },
  56     }, {
  57         'note': 'Movie without expiry',
  58         'url': 'https://duoplay.ee/5501/pilvede-all.-neljas-ode',
  59         'md5': '7abf63d773a49ef7c39f2c127842b8fd',
  60         'info_dict': {
  61             'id': '5501',
  62             'ext': 'mp4',
  63             'title': 'Pilvede all. Neljas õde',
  64             'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
  65             'description': 'md5:d86a70f8f31e82c369d4d4f4c79b1279',
  66             'cast': 'count:9',
  67             'upload_date': '20221214',
  68             'timestamp': 1671054000,
  69             'release_year': 2018,
  70         },
  71     }]
  72
  73     def _real_extract(self, url):
  74         telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
  75         video_id = join_nonempty(telecast_id, episode, delim='_')
  76         webpage = self._download_webpage(url, video_id)
  77         video_player = try_call(lambda: extract_attributes(
  78             get_element_text_and_html_by_tag('video-player', webpage)[1]))
  79         if not video_player or not video_player.get('manifest-url'):
  80             raise ExtractorError('No video found', expected=True)
  81
  82         episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
  83
  84         return {
  85             'id': video_id,
  86             'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
  87             **traverse_obj(episode_attr, {
  88                 'title': 'title',
  89                 'description': 'synopsis',
  90                 'thumbnail': ('images', 'original'),
  91                 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
  92                 'cast': ('cast', {lambda x: x.split(', ')}),
  93                 'release_year': ('year', {int_or_none}),
  94             }),
  95             **(traverse_obj(episode_attr, {
  96                 'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
  97                 'series': 'title',
  98                 'series_id': ('telecast_id', {str_or_none}),
  99                 'season_number': ('season_id', {int_or_none}),
 100                 'episode': 'subtitle',
 101                 'episode_number': ('episode_nr', {int_or_none}),
 102                 'episode_id': ('episode_id', {int_or_none}),
 103             }, get_all=False) if episode_attr.get('category') != 'movies' else {}),
 104         }