[yt-dlp.git] / yt_dlp / extractor / theta.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import try_get


class ThetaStreamIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9]+)'
    _TESTS = [{
        'url': 'https://www.theta.tv/davirus',
        'skip': 'The live may have ended',
        'info_dict': {
            'id': 'DaVirus',
            'ext': 'mp4',
            'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
            'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
        }
    }, {
        'url': 'https://www.theta.tv/mst3k',
        'note': 'This channel is live 24/7',
        'info_dict': {
            'id': 'MST3K',
            'ext': 'mp4',
            'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
            'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
        }
    }]

    def _real_extract(self, url):
        channel_id = self._match_id(url)
        info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']

        m3u8_playlist = next(
            data['url'] for data in info['live_stream']['video_urls']
            if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))

        formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
        self._sort_formats(formats)

        channel = try_get(info, lambda x: x['user']['username'])  # using this field instead of channel_id due to capitalization

        return {
            'id': channel,
            'title': try_get(info, lambda x: x['live_stream']['title']),
            'channel': channel,
            'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
            'is_live': True,
            'formats': formats,
            'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
        }


class ThetaVideoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
    _TEST = {
        'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
        'md5': '633d8c29eb276bb38a111dbd591c677f',
        'info_dict': {
            'id': 'vidiq6aaet3kzf799p0',
            'ext': 'mp4',
            'title': 'Theta EdgeCast Tutorial',
            'uploader': 'Pixiekittie',
            'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
            'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']

        m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])

        formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': info.get('title'),
            'uploader': try_get(info, lambda x: x['user']['username']),
            'description': info.get('description'),
            'view_count': info.get('view_count'),
            'like_count': info.get('like_count'),
            'formats': formats,
            'thumbnail': info.get('thumbnail_url'),
        }
Commit	Line	Data
eb6d4ad1 AK	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5	from ..utils import try_get
	6
	7
0eaec13b AK	8	class ThetaStreamIE(InfoExtractor):
0eaec13b AK	9	_VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9]+)'
eb6d4ad1 AK	10	_TESTS = [{
	11	'url': 'https://www.theta.tv/davirus',
	12	'skip': 'The live may have ended',
	13	'info_dict': {
	14	'id': 'DaVirus',
	15	'ext': 'mp4',
	16	'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
	17	'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
	18	}
	19	}, {
	20	'url': 'https://www.theta.tv/mst3k',
	21	'note': 'This channel is live 24/7',
	22	'info_dict': {
	23	'id': 'MST3K',
	24	'ext': 'mp4',
	25	'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
	26	'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
	27	}
	28	}]
	29
	30	def _real_extract(self, url):
	31	channel_id = self._match_id(url)
	32	info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
	33
	34	m3u8_playlist = next(
	35	data['url'] for data in info['live_stream']['video_urls']
	36	if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
	37
	38	formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
	39	self._sort_formats(formats)
	40
	41	channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization
	42
	43	return {
	44	'id': channel,
	45	'title': try_get(info, lambda x: x['live_stream']['title']),
	46	'channel': channel,
	47	'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
	48	'is_live': True,
	49	'formats': formats,
	50	'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
	51	}
0eaec13b AK	52
	53
	54	class ThetaVideoIE(InfoExtractor):
	55	_VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
	56	_TEST = {
	57	'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
	58	'md5': '633d8c29eb276bb38a111dbd591c677f',
	59	'info_dict': {
	60	'id': 'vidiq6aaet3kzf799p0',
	61	'ext': 'mp4',
	62	'title': 'Theta EdgeCast Tutorial',
	63	'uploader': 'Pixiekittie',
	64	'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
	65	'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
	66	}
	67	}
	68
	69	def _real_extract(self, url):
	70	video_id = self._match_id(url)
	71	info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']
	72
	73	m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
	74
	75	formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
	76	self._sort_formats(formats)
	77
	78	return {
	79	'id': video_id,
	80	'title': info.get('title'),
	81	'uploader': try_get(info, lambda x: x['user']['username']),
	82	'description': info.get('description'),
	83	'view_count': info.get('view_count'),
	84	'like_count': info.get('like_count'),
	85	'formats': formats,
	86	'thumbnail': info.get('thumbnail_url'),
	87	}