yt_dlp/extractor/theta.py

   1 from .common import InfoExtractor
   2 from ..utils import try_get
   3
   4
   5 class ThetaStreamIE(InfoExtractor):
   6     _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9-]+)'
   7     _TESTS = [{
   8         'url': 'https://www.theta.tv/davirus',
   9         'skip': 'The live may have ended',
  10         'info_dict': {
  11             'id': 'DaVirus',
  12             'ext': 'mp4',
  13             'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
  14             'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
  15         }
  16     }, {
  17         'url': 'https://www.theta.tv/mst3k',
  18         'note': 'This channel is live 24/7',
  19         'info_dict': {
  20             'id': 'MST3K',
  21             'ext': 'mp4',
  22             'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
  23             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
  24         }
  25     }, {
  26         'url': 'https://www.theta.tv/contv-anime',
  27         'info_dict': {
  28             'id': 'ConTVAnime',
  29             'ext': 'mp4',
  30             'title': 'CONTV ANIME 24/7. Powered by THETA Network.',
  31             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
  32         }
  33     }]
  34
  35     def _real_extract(self, url):
  36         channel_id = self._match_id(url)
  37         info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
  38
  39         m3u8_playlist = next(
  40             data['url'] for data in info['live_stream']['video_urls']
  41             if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
  42
  43         formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
  44         self._sort_formats(formats)
  45
  46         channel = try_get(info, lambda x: x['user']['username'])  # using this field instead of channel_id due to capitalization
  47
  48         return {
  49             'id': channel,
  50             'title': try_get(info, lambda x: x['live_stream']['title']),
  51             'channel': channel,
  52             'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
  53             'is_live': True,
  54             'formats': formats,
  55             'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
  56         }
  57
  58
  59 class ThetaVideoIE(InfoExtractor):
  60     _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
  61     _TEST = {
  62         'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
  63         'md5': '633d8c29eb276bb38a111dbd591c677f',
  64         'info_dict': {
  65             'id': 'vidiq6aaet3kzf799p0',
  66             'ext': 'mp4',
  67             'title': 'Theta EdgeCast Tutorial',
  68             'uploader': 'Pixiekittie',
  69             'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
  70             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
  71         }
  72     }
  73
  74     def _real_extract(self, url):
  75         video_id = self._match_id(url)
  76         info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']
  77
  78         m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
  79
  80         formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
  81         self._sort_formats(formats)
  82
  83         return {
  84             'id': video_id,
  85             'title': info.get('title'),
  86             'uploader': try_get(info, lambda x: x['user']['username']),
  87             'description': info.get('description'),
  88             'view_count': info.get('view_count'),
  89             'like_count': info.get('like_count'),
  90             'formats': formats,
  91             'thumbnail': info.get('thumbnail_url'),
  92         }