yt_dlp/extractor/theta.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import try_get
   6
   7
   8 class ThetaStreamIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9-]+)'
  10     _TESTS = [{
  11         'url': 'https://www.theta.tv/davirus',
  12         'skip': 'The live may have ended',
  13         'info_dict': {
  14             'id': 'DaVirus',
  15             'ext': 'mp4',
  16             'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
  17             'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
  18         }
  19     }, {
  20         'url': 'https://www.theta.tv/mst3k',
  21         'note': 'This channel is live 24/7',
  22         'info_dict': {
  23             'id': 'MST3K',
  24             'ext': 'mp4',
  25             'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
  26             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
  27         }
  28     }, {
  29         'url': 'https://www.theta.tv/contv-anime',
  30         'info_dict': {
  31             'id': 'ConTVAnime',
  32             'ext': 'mp4',
  33             'title': 'CONTV ANIME 24/7. Powered by THETA Network.',
  34             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
  35         }
  36     }]
  37
  38     def _real_extract(self, url):
  39         channel_id = self._match_id(url)
  40         info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
  41
  42         m3u8_playlist = next(
  43             data['url'] for data in info['live_stream']['video_urls']
  44             if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
  45
  46         formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
  47         self._sort_formats(formats)
  48
  49         channel = try_get(info, lambda x: x['user']['username'])  # using this field instead of channel_id due to capitalization
  50
  51         return {
  52             'id': channel,
  53             'title': try_get(info, lambda x: x['live_stream']['title']),
  54             'channel': channel,
  55             'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
  56             'is_live': True,
  57             'formats': formats,
  58             'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
  59         }
  60
  61
  62 class ThetaVideoIE(InfoExtractor):
  63     _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
  64     _TEST = {
  65         'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
  66         'md5': '633d8c29eb276bb38a111dbd591c677f',
  67         'info_dict': {
  68             'id': 'vidiq6aaet3kzf799p0',
  69             'ext': 'mp4',
  70             'title': 'Theta EdgeCast Tutorial',
  71             'uploader': 'Pixiekittie',
  72             'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
  73             'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
  74         }
  75     }
  76
  77     def _real_extract(self, url):
  78         video_id = self._match_id(url)
  79         info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']
  80
  81         m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
  82
  83         formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
  84         self._sort_formats(formats)
  85
  86         return {
  87             'id': video_id,
  88             'title': info.get('title'),
  89             'uploader': try_get(info, lambda x: x['user']['username']),
  90             'description': info.get('description'),
  91             'view_count': info.get('view_count'),
  92             'like_count': info.get('like_count'),
  93             'formats': formats,
  94             'thumbnail': info.get('thumbnail_url'),
  95         }