yt_dlp/extractor/hitbox.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..compat import compat_str
   5 from ..utils import (
   6     clean_html,
   7     determine_ext,
   8     float_or_none,
   9     int_or_none,
  10     parse_iso8601,
  11 )
  12
  13
  14 class HitboxIE(InfoExtractor):
  15     IE_NAME = 'hitbox'
  16     _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
  17     _TESTS = [{
  18         'url': 'http://www.hitbox.tv/video/203213',
  19         'info_dict': {
  20             'id': '203213',
  21             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
  22             'alt_title': 'hitboxlive - Aug 9th #6',
  23             'description': '',
  24             'ext': 'mp4',
  25             'thumbnail': r're:^https?://.*\.jpg$',
  26             'duration': 215.1666,
  27             'resolution': 'HD 720p',
  28             'uploader': 'hitboxlive',
  29             'view_count': int,
  30             'timestamp': 1407576133,
  31             'upload_date': '20140809',
  32             'categories': ['Live Show'],
  33         },
  34         'params': {
  35             # m3u8 download
  36             'skip_download': True,
  37         },
  38     }, {
  39         'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
  40         'only_matching': True,
  41     }]
  42
  43     def _extract_metadata(self, url, video_id):
  44         thumb_base = 'https://edge.sf.hitbox.tv'
  45         metadata = self._download_json(
  46             '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
  47
  48         date = 'media_live_since'
  49         media_type = 'livestream'
  50         if metadata.get('media_type') == 'video':
  51             media_type = 'video'
  52             date = 'media_date_added'
  53
  54         video_meta = metadata.get(media_type, [])[0]
  55         title = video_meta.get('media_status')
  56         alt_title = video_meta.get('media_title')
  57         description = clean_html(
  58             video_meta.get('media_description')
  59             or video_meta.get('media_description_md'))
  60         duration = float_or_none(video_meta.get('media_duration'))
  61         uploader = video_meta.get('media_user_name')
  62         views = int_or_none(video_meta.get('media_views'))
  63         timestamp = parse_iso8601(video_meta.get(date), ' ')
  64         categories = [video_meta.get('category_name')]
  65         thumbs = [{
  66             'url': thumb_base + video_meta.get('media_thumbnail'),
  67             'width': 320,
  68             'height': 180
  69         }, {
  70             'url': thumb_base + video_meta.get('media_thumbnail_large'),
  71             'width': 768,
  72             'height': 432
  73         }]
  74
  75         return {
  76             'id': video_id,
  77             'title': title,
  78             'alt_title': alt_title,
  79             'description': description,
  80             'ext': 'mp4',
  81             'thumbnails': thumbs,
  82             'duration': duration,
  83             'uploader': uploader,
  84             'view_count': views,
  85             'timestamp': timestamp,
  86             'categories': categories,
  87         }
  88
  89     def _real_extract(self, url):
  90         video_id = self._match_id(url)
  91
  92         player_config = self._download_json(
  93             'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
  94             video_id, 'Downloading video JSON')
  95
  96         formats = []
  97         for video in player_config['clip']['bitrates']:
  98             label = video.get('label')
  99             if label == 'Auto':
 100                 continue
 101             video_url = video.get('url')
 102             if not video_url:
 103                 continue
 104             bitrate = int_or_none(video.get('bitrate'))
 105             if determine_ext(video_url) == 'm3u8':
 106                 if not video_url.startswith('http'):
 107                     continue
 108                 formats.append({
 109                     'url': video_url,
 110                     'ext': 'mp4',
 111                     'tbr': bitrate,
 112                     'format_note': label,
 113                     'protocol': 'm3u8_native',
 114                 })
 115             else:
 116                 formats.append({
 117                     'url': video_url,
 118                     'tbr': bitrate,
 119                     'format_note': label,
 120                 })
 121
 122         metadata = self._extract_metadata(
 123             'https://www.smashcast.tv/api/media/video', video_id)
 124         metadata['formats'] = formats
 125
 126         return metadata
 127
 128
 129 class HitboxLiveIE(HitboxIE):  # XXX: Do not subclass from concrete IE
 130     IE_NAME = 'hitbox:live'
 131     _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
 132     _TESTS = [{
 133         'url': 'http://www.hitbox.tv/dimak',
 134         'info_dict': {
 135             'id': 'dimak',
 136             'ext': 'mp4',
 137             'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
 138             'timestamp': int,
 139             'upload_date': compat_str,
 140             'title': compat_str,
 141             'uploader': 'Dimak',
 142         },
 143         'params': {
 144             # live
 145             'skip_download': True,
 146         },
 147     }, {
 148         'url': 'https://www.smashcast.tv/dimak',
 149         'only_matching': True,
 150     }]
 151
 152     @classmethod
 153     def suitable(cls, url):
 154         return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
 155
 156     def _real_extract(self, url):
 157         video_id = self._match_id(url)
 158
 159         player_config = self._download_json(
 160             'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
 161             video_id)
 162
 163         formats = []
 164         cdns = player_config.get('cdns')
 165         servers = []
 166         for cdn in cdns:
 167             # Subscribe URLs are not playable
 168             if cdn.get('rtmpSubscribe') is True:
 169                 continue
 170             base_url = cdn.get('netConnectionUrl')
 171             host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
 172             if base_url not in servers:
 173                 servers.append(base_url)
 174                 for stream in cdn.get('bitrates'):
 175                     label = stream.get('label')
 176                     if label == 'Auto':
 177                         continue
 178                     stream_url = stream.get('url')
 179                     if not stream_url:
 180                         continue
 181                     bitrate = int_or_none(stream.get('bitrate'))
 182                     if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
 183                         if not stream_url.startswith('http'):
 184                             continue
 185                         formats.append({
 186                             'url': stream_url,
 187                             'ext': 'mp4',
 188                             'tbr': bitrate,
 189                             'format_note': label,
 190                             'rtmp_live': True,
 191                         })
 192                     else:
 193                         formats.append({
 194                             'url': '%s/%s' % (base_url, stream_url),
 195                             'ext': 'mp4',
 196                             'tbr': bitrate,
 197                             'rtmp_live': True,
 198                             'format_note': host,
 199                             'page_url': url,
 200                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
 201                         })
 202
 203         metadata = self._extract_metadata(
 204             'https://www.smashcast.tv/api/media/live', video_id)
 205         metadata['formats'] = formats
 206         metadata['is_live'] = True
 207         metadata['title'] = metadata.get('title')
 208
 209         return metadata