[yt-dlp.git] / youtube_dl / extractor / beampro.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    clean_html,
    compat_str,
    float_or_none,
    int_or_none,
    parse_iso8601,
    try_get,
    urljoin,
)


class BeamProBaseIE(InfoExtractor):
    _API_BASE = 'https://mixer.com/api/v1'
    _RATINGS = {'family': 0, 'teen': 13, '18+': 18}

    def _extract_channel_info(self, chan):
        user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
        return {
            'uploader': chan.get('token') or try_get(
                chan, lambda x: x['user']['username'], compat_str),
            'uploader_id': compat_str(user_id) if user_id else None,
            'age_limit': self._RATINGS.get(chan.get('audience')),
        }


class BeamProLiveIE(BeamProBaseIE):
    IE_NAME = 'Mixer:live'
    _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'http://mixer.com/niterhayven',
        'info_dict': {
            'id': '261562',
            'ext': 'mp4',
            'title': 'Introducing The Witcher 3 //  The Grind Starts Now!',
            'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
            'thumbnail': r're:https://.*\.jpg$',
            'timestamp': 1483477281,
            'upload_date': '20170103',
            'uploader': 'niterhayven',
            'uploader_id': '373396',
            'age_limit': 18,
            'is_live': True,
            'view_count': int,
        },
        'skip': 'niterhayven is offline',
        'params': {
            'skip_download': True,
        },
    }

    _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE

    @classmethod
    def suitable(cls, url):
        return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)

    def _real_extract(self, url):
        channel_name = self._match_id(url)

        chan = self._download_json(
            '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)

        if chan.get('online') is False:
            raise ExtractorError(
                '{0} is offline'.format(channel_name), expected=True)

        channel_id = chan['id']

        def manifest_url(kind):
            return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)

        formats = self._extract_m3u8_formats(
            manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
            fatal=False)
        formats.extend(self._extract_smil_formats(
            manifest_url('smil'), channel_name, fatal=False))
        self._sort_formats(formats)

        info = {
            'id': compat_str(chan.get('id') or channel_name),
            'title': self._live_title(chan.get('name') or channel_name),
            'description': clean_html(chan.get('description')),
            'thumbnail': try_get(
                chan, lambda x: x['thumbnail']['url'], compat_str),
            'timestamp': parse_iso8601(chan.get('updatedAt')),
            'is_live': True,
            'view_count': int_or_none(chan.get('viewersTotal')),
            'formats': formats,
        }
        info.update(self._extract_channel_info(chan))

        return info


class BeamProVodIE(BeamProBaseIE):
    IE_NAME = 'Mixer:vod'
    _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
    _TEST = {
        'url': 'https://mixer.com/willow8714?vod=2259830',
        'md5': 'b2431e6e8347dc92ebafb565d368b76b',
        'info_dict': {
            'id': '2259830',
            'ext': 'mp4',
            'title': 'willow8714\'s Channel',
            'duration': 6828.15,
            'thumbnail': r're:https://.*source\.png$',
            'timestamp': 1494046474,
            'upload_date': '20170506',
            'uploader': 'willow8714',
            'uploader_id': '6085379',
            'age_limit': 13,
            'view_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }

    @staticmethod
    def _extract_format(vod, vod_type):
        if not vod.get('baseUrl'):
            return []

        if vod_type == 'hls':
            filename, protocol = 'manifest.m3u8', 'm3u8_native'
        elif vod_type == 'raw':
            filename, protocol = 'source.mp4', 'https'
        else:
            assert False

        data = vod.get('data') if isinstance(vod.get('data'), dict) else {}

        format_id = [vod_type]
        if isinstance(data.get('Height'), compat_str):
            format_id.append('%sp' % data['Height'])

        return [{
            'url': urljoin(vod['baseUrl'], filename),
            'format_id': '-'.join(format_id),
            'ext': 'mp4',
            'protocol': protocol,
            'width': int_or_none(data.get('Width')),
            'height': int_or_none(data.get('Height')),
            'fps': int_or_none(data.get('Fps')),
            'tbr': int_or_none(data.get('Bitrate'), 1000),
        }]

    def _real_extract(self, url):
        vod_id = self._match_id(url)

        vod_info = self._download_json(
            '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)

        state = vod_info.get('state')
        if state != 'AVAILABLE':
            raise ExtractorError(
                'VOD %s is not available (state: %s)' % (vod_id, state),
                expected=True)

        formats = []
        thumbnail_url = None

        for vod in vod_info['vods']:
            vod_type = vod.get('format')
            if vod_type in ('hls', 'raw'):
                formats.extend(self._extract_format(vod, vod_type))
            elif vod_type == 'thumbnail':
                thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')

        self._sort_formats(formats)

        info = {
            'id': vod_id,
            'title': vod_info.get('name') or vod_id,
            'duration': float_or_none(vod_info.get('duration')),
            'thumbnail': thumbnail_url,
            'timestamp': parse_iso8601(vod_info.get('createdAt')),
            'view_count': int_or_none(vod_info.get('viewsTotal')),
            'formats': formats,
        }
        info.update(self._extract_channel_info(vod_info.get('channel') or {}))

        return info
Commit	Line	Data
cd55c6cc	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5	from ..utils import (
	6	ExtractorError,
	7	clean_html,
	8	compat_str,
1e0d65f0	9	float_or_none,
cd55c6cc	10	int_or_none,
	11	parse_iso8601,
	12	try_get,
1e0d65f0	13	urljoin,
cd55c6cc	14	)
	15
	16
1e0d65f0	17	class BeamProBaseIE(InfoExtractor):
6bceb36b	18	_API_BASE = 'https://mixer.com/api/v1'
1e0d65f0 MF	19	_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
	20
	21	def _extract_channel_info(self, chan):
	22	user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
	23	return {
	24	'uploader': chan.get('token') or try_get(
	25	chan, lambda x: x['user']['username'], compat_str),
	26	'uploader_id': compat_str(user_id) if user_id else None,
	27	'age_limit': self._RATINGS.get(chan.get('audience')),
	28	}
	29
	30
	31	class BeamProLiveIE(BeamProBaseIE):
6bceb36b S	32	IE_NAME = 'Mixer:live'
6bceb36b S	33	_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro\|mixer\.com)/(?P<id>[^/?#&]+)'
cd55c6cc	34	_TEST = {
6bceb36b	35	'url': 'http://mixer.com/niterhayven',
cd55c6cc	36	'info_dict': {
	37	'id': '261562',
	38	'ext': 'mp4',
cd55c6cc	39	'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
af62de10	40	'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
cd55c6cc	41	'thumbnail': r're:https://.*\.jpg$',
af62de10	42	'timestamp': 1483477281,
cd55c6cc	43	'upload_date': '20170103',
af62de10 S	44	'uploader': 'niterhayven',
	45	'uploader_id': '373396',
	46	'age_limit': 18,
cd55c6cc	47	'is_live': True,
af62de10	48	'view_count': int,
cd55c6cc	49	},
	50	'skip': 'niterhayven is offline',
	51	'params': {
	52	'skip_download': True,
	53	},
	54	}
	55
6bceb36b S	56	_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
6bceb36b S	57
1e0d65f0 MF	58	@classmethod
	59	def suitable(cls, url):
	60	return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
	61
cd55c6cc	62	def _real_extract(self, url):
af62de10	63	channel_name = self._match_id(url)
cd55c6cc	64
af62de10	65	chan = self._download_json(
6bceb36b	66	'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
cd55c6cc	67
af62de10 S	68	if chan.get('online') is False:
	69	raise ExtractorError(
	70	'{0} is offline'.format(channel_name), expected=True)
cd55c6cc	71
af62de10	72	channel_id = chan['id']
cd55c6cc	73
6bceb36b S	74	def manifest_url(kind):
	75	return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
	76
af62de10	77	formats = self._extract_m3u8_formats(
6bceb36b S	78	manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
	79	fatal=False)
	80	formats.extend(self._extract_smil_formats(
	81	manifest_url('smil'), channel_name, fatal=False))
af62de10	82	self._sort_formats(formats)
cd55c6cc	83
1e0d65f0	84	info = {
af62de10 S	85	'id': compat_str(chan.get('id') or channel_name),
	86	'title': self._live_title(chan.get('name') or channel_name),
	87	'description': clean_html(chan.get('description')),
6bceb36b S	88	'thumbnail': try_get(
6bceb36b S	89	chan, lambda x: x['thumbnail']['url'], compat_str),
af62de10	90	'timestamp': parse_iso8601(chan.get('updatedAt')),
af62de10 S	91	'is_live': True,
	92	'view_count': int_or_none(chan.get('viewersTotal')),
	93	'formats': formats,
cd55c6cc	94	}
1e0d65f0 MF	95	info.update(self._extract_channel_info(chan))
	96
	97	return info
	98
	99
	100	class BeamProVodIE(BeamProBaseIE):
6bceb36b S	101	IE_NAME = 'Mixer:vod'
6bceb36b S	102	_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro\|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
1e0d65f0	103	_TEST = {
6bceb36b	104	'url': 'https://mixer.com/willow8714?vod=2259830',
1e0d65f0 MF	105	'md5': 'b2431e6e8347dc92ebafb565d368b76b',
	106	'info_dict': {
	107	'id': '2259830',
	108	'ext': 'mp4',
	109	'title': 'willow8714\'s Channel',
	110	'duration': 6828.15,
	111	'thumbnail': r're:https://.*source\.png$',
	112	'timestamp': 1494046474,
	113	'upload_date': '20170506',
	114	'uploader': 'willow8714',
	115	'uploader_id': '6085379',
	116	'age_limit': 13,
	117	'view_count': int,
	118	},
6bceb36b S	119	'params': {
	120	'skip_download': True,
	121	},
1e0d65f0 MF	122	}
1e0d65f0 MF	123
6bceb36b S	124	@staticmethod
6bceb36b S	125	def _extract_format(vod, vod_type):
1e0d65f0 MF	126	if not vod.get('baseUrl'):
	127	return []
	128
	129	if vod_type == 'hls':
6bceb36b	130	filename, protocol = 'manifest.m3u8', 'm3u8_native'
1e0d65f0 MF	131	elif vod_type == 'raw':
	132	filename, protocol = 'source.mp4', 'https'
	133	else:
6bceb36b	134	assert False
1e0d65f0	135
6bceb36b	136	data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
1e0d65f0 MF	137
1e0d65f0 MF	138	format_id = [vod_type]
6bceb36b	139	if isinstance(data.get('Height'), compat_str):
1e0d65f0 MF	140	format_id.append('%sp' % data['Height'])
	141
	142	return [{
	143	'url': urljoin(vod['baseUrl'], filename),
	144	'format_id': '-'.join(format_id),
	145	'ext': 'mp4',
	146	'protocol': protocol,
	147	'width': int_or_none(data.get('Width')),
	148	'height': int_or_none(data.get('Height')),
	149	'fps': int_or_none(data.get('Fps')),
	150	'tbr': int_or_none(data.get('Bitrate'), 1000),
	151	}]
	152
	153	def _real_extract(self, url):
	154	vod_id = self._match_id(url)
	155
	156	vod_info = self._download_json(
6bceb36b	157	'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
1e0d65f0 MF	158
	159	state = vod_info.get('state')
	160	if state != 'AVAILABLE':
	161	raise ExtractorError(
6bceb36b S	162	'VOD %s is not available (state: %s)' % (vod_id, state),
6bceb36b S	163	expected=True)
1e0d65f0 MF	164
	165	formats = []
	166	thumbnail_url = None
	167
	168	for vod in vod_info['vods']:
	169	vod_type = vod.get('format')
	170	if vod_type in ('hls', 'raw'):
	171	formats.extend(self._extract_format(vod, vod_type))
	172	elif vod_type == 'thumbnail':
	173	thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
	174
	175	self._sort_formats(formats)
	176
	177	info = {
	178	'id': vod_id,
	179	'title': vod_info.get('name') or vod_id,
	180	'duration': float_or_none(vod_info.get('duration')),
	181	'thumbnail': thumbnail_url,
	182	'timestamp': parse_iso8601(vod_info.get('createdAt')),
	183	'view_count': int_or_none(vod_info.get('viewsTotal')),
	184	'formats': formats,
	185	}
6bceb36b	186	info.update(self._extract_channel_info(vod_info.get('channel') or {}))
1e0d65f0 MF	187
1e0d65f0 MF	188	return info