yt_dlp/extractor/dctp.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     float_or_none,
   4     int_or_none,
   5     unified_timestamp,
   6     url_or_none,
   7 )
   8
   9
  10 class DctpTvIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
  12     _TESTS = [{
  13         # 4x3
  14         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
  15         'md5': '3ffbd1556c3fe210724d7088fad723e3',
  16         'info_dict': {
  17             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
  18             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
  19             'ext': 'm4v',
  20             'title': 'Videoinstallation für eine Kaufhausfassade',
  21             'description': 'Kurzfilm',
  22             'thumbnail': r're:^https?://.*\.jpg$',
  23             'duration': 71.24,
  24             'timestamp': 1302172322,
  25             'upload_date': '20110407',
  26         },
  27     }, {
  28         # 16x9
  29         'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
  30         'only_matching': True,
  31     }]
  32
  33     _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
  34
  35     def _real_extract(self, url):
  36         display_id = self._match_id(url)
  37
  38         version = self._download_json(
  39             f'{self._BASE_URL}/version.json', display_id,
  40             'Downloading version JSON')
  41
  42         restapi_base = '{}/{}/restapi'.format(
  43             self._BASE_URL, version['version_name'])
  44
  45         info = self._download_json(
  46             f'{restapi_base}/slugs/{display_id}.json', display_id,
  47             'Downloading video info JSON')
  48
  49         media = self._download_json(
  50             '{}/media/{}.json'.format(restapi_base, str(info['object_id'])),
  51             display_id, 'Downloading media JSON')
  52
  53         uuid = media['uuid']
  54         title = media['title']
  55         is_wide = media.get('is_wide')
  56         formats = []
  57
  58         def add_formats(suffix):
  59             templ = f'https://%s/{uuid}_dctp_{suffix}.m4v'
  60             formats.extend([{
  61                 'format_id': 'hls-' + suffix,
  62                 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
  63                 'protocol': 'm3u8_native',
  64             }, {
  65                 'format_id': 's3-' + suffix,
  66                 'url': templ % 'completed-media.s3.amazonaws.com',
  67             }, {
  68                 'format_id': 'http-' + suffix,
  69                 'url': templ % 'cdn-media.dctp.tv',
  70             }])
  71
  72         add_formats('0500_' + ('16x9' if is_wide else '4x3'))
  73         if is_wide:
  74             add_formats('720p')
  75
  76         thumbnails = []
  77         images = media.get('images')
  78         if isinstance(images, list):
  79             for image in images:
  80                 if not isinstance(image, dict):
  81                     continue
  82                 image_url = url_or_none(image.get('url'))
  83                 if not image_url:
  84                     continue
  85                 thumbnails.append({
  86                     'url': image_url,
  87                     'width': int_or_none(image.get('width')),
  88                     'height': int_or_none(image.get('height')),
  89                 })
  90
  91         return {
  92             'id': uuid,
  93             'display_id': display_id,
  94             'title': title,
  95             'alt_title': media.get('subtitle'),
  96             'description': media.get('description') or media.get('teaser'),
  97             'timestamp': unified_timestamp(media.get('created')),
  98             'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
  99             'thumbnails': thumbnails,
 100             'formats': formats,
 101         }