yt_dlp/extractor/theta.py

   1 from .common import InfoExtractor
   2 from ..utils import try_get
   3
   4
   5 class ThetaStreamIE(InfoExtractor):
   6     _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9-]+)'
   7     _TESTS = [{
   8         'url': 'https://www.theta.tv/davirus',
   9         'skip': 'The live may have ended',
  10         'info_dict': {
  11             'id': 'DaVirus',
  12             'ext': 'mp4',
  13             'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
  14             'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
  15         }
  16     }, {
  17         'url': 'https://www.theta.tv/mst3k',
  18         'note': 'This channel is live 24/7',
  19         'info_dict': {
  20             'id': 'MST3K',
  21             'ext': 'mp4',
  22             'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
  23             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
  24         }
  25     }, {
  26         'url': 'https://www.theta.tv/contv-anime',
  27         'info_dict': {
  28             'id': 'ConTVAnime',
  29             'ext': 'mp4',
  30             'title': 'CONTV ANIME 24/7. Powered by THETA Network.',
  31             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
  32         }
  33     }]
  34
  35     def _real_extract(self, url):
  36         channel_id = self._match_id(url)
  37         info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
  38
  39         m3u8_playlist = next(
  40             data['url'] for data in info['live_stream']['video_urls']
  41             if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
  42
  43         formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
  44
  45         channel = try_get(info, lambda x: x['user']['username'])  # using this field instead of channel_id due to capitalization
  46
  47         return {
  48             'id': channel,
  49             'title': try_get(info, lambda x: x['live_stream']['title']),
  50             'channel': channel,
  51             'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
  52             'is_live': True,
  53             'formats': formats,
  54             'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
  55         }
  56
  57
  58 class ThetaVideoIE(InfoExtractor):
  59     _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
  60     _TEST = {
  61         'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
  62         'md5': '633d8c29eb276bb38a111dbd591c677f',
  63         'info_dict': {
  64             'id': 'vidiq6aaet3kzf799p0',
  65             'ext': 'mp4',
  66             'title': 'Theta EdgeCast Tutorial',
  67             'uploader': 'Pixiekittie',
  68             'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
  69             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
  70         }
  71     }
  72
  73     def _real_extract(self, url):
  74         video_id = self._match_id(url)
  75         info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']
  76
  77         m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
  78
  79         formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
  80
  81         return {
  82             'id': video_id,
  83             'title': info.get('title'),
  84             'uploader': try_get(info, lambda x: x['user']['username']),
  85             'description': info.get('description'),
  86             'view_count': info.get('view_count'),
  87             'like_count': info.get('like_count'),
  88             'formats': formats,
  89             'thumbnail': info.get('thumbnail_url'),
  90         }