yt_dlp/extractor/sportdeutschland.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     clean_html,
   7     float_or_none,
   8     int_or_none,
   9     parse_iso8601,
  10     parse_qs,
  11     strip_or_none,
  12     try_get,
  13 )
  14
  15
  16 class SportDeutschlandIE(InfoExtractor):
  17     _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
  18     _TESTS = [{
  19         'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
  20         'info_dict': {
  21             'id': '5318cac0275701382770543d7edaf0a0',
  22             'ext': 'mp4',
  23             'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
  24             'duration': 16106.36,
  25         },
  26         'params': {
  27             'noplaylist': True,
  28             # m3u8 download
  29             'skip_download': True,
  30         },
  31     }, {
  32         'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
  33         'info_dict': {
  34             'id': 'c6e2fdd01f63013854c47054d2ab776f',
  35             'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
  36             'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
  37             'duration': 31397,
  38         },
  39         'playlist_count': 2,
  40     }, {
  41         'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
  42         'only_matching': True,
  43     }]
  44
  45     def _real_extract(self, url):
  46         display_id = self._match_id(url)
  47         data = self._download_json(
  48             'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
  49             display_id, query={'access_token': 'true'})
  50         asset = data['asset']
  51         title = (asset.get('title') or asset['label']).strip()
  52         asset_id = asset.get('id') or asset.get('uuid')
  53         info = {
  54             'id': asset_id,
  55             'title': title,
  56             'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
  57             'duration': int_or_none(asset.get('seconds')),
  58         }
  59         videos = asset.get('videos') or []
  60         if len(videos) > 1:
  61             playlist_id = parse_qs(url).get('playlistId', [None])[0]
  62             if playlist_id:
  63                 if self.get_param('noplaylist'):
  64                     videos = [videos[int(playlist_id)]]
  65                     self.to_screen('Downloading just a single video because of --no-playlist')
  66                 else:
  67                     self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
  68
  69             def entries():
  70                 for i, video in enumerate(videos, 1):
  71                     video_id = video.get('uuid')
  72                     video_url = video.get('url')
  73                     if not (video_id and video_url):
  74                         continue
  75                     formats = self._extract_m3u8_formats(
  76                         video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
  77                     if not formats and not self.get_param('ignore_no_formats'):
  78                         continue
  79                     yield {
  80                         'id': video_id,
  81                         'formats': formats,
  82                         'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
  83                         'duration': float_or_none(video.get('duration')),
  84                     }
  85             info.update({
  86                 '_type': 'multi_video',
  87                 'entries': entries(),
  88             })
  89         else:
  90             formats = self._extract_m3u8_formats(
  91                 videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
  92             section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
  93             info.update({
  94                 'formats': formats,
  95                 'display_id': asset.get('permalink'),
  96                 'thumbnail': try_get(asset, lambda x: x['images'][0]),
  97                 'categories': [section_title] if section_title else None,
  98                 'view_count': int_or_none(asset.get('views')),
  99                 'is_live': asset.get('is_live') is True,
 100                 'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
 101             })
 102         return info