[yt-dlp.git] / yt_dlp / extractor / picarto.py

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    js_to_json,
)


class PicartoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
    _TEST = {
        'url': 'https://picarto.tv/Setz',
        'info_dict': {
            'id': 'Setz',
            'ext': 'mp4',
            'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'timestamp': int,
            'is_live': True
        },
        'skip': 'Stream is offline',
    }

    @classmethod
    def suitable(cls, url):
        return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)

    def _real_extract(self, url):
        channel_id = self._match_id(url)

        data = self._download_json(
            'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
                'query': '''{
  channel(name: "%s") {
    adult
    id
    online
    stream_name
    title
  }
  getLoadBalancerUrl(channel_name: "%s") {
    url
  }
}''' % (channel_id, channel_id),
            })['data']
        metadata = data['channel']

        if metadata.get('online') == 0:
            raise ExtractorError('Stream is offline', expected=True)
        title = metadata['title']

        cdn_data = self._download_json(
            data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
            channel_id, 'Downloading load balancing info')

        formats = []
        for source in (cdn_data.get('source') or []):
            source_url = source.get('url')
            if not source_url:
                continue
            source_type = source.get('type')
            if source_type == 'html5/application/vnd.apple.mpegurl':
                formats.extend(self._extract_m3u8_formats(
                    source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
            elif source_type == 'html5/video/mp4':
                formats.append({
                    'url': source_url,
                })
        self._sort_formats(formats)

        mature = metadata.get('adult')
        if mature is None:
            age_limit = None
        else:
            age_limit = 18 if mature is True else 0

        return {
            'id': channel_id,
            'title': title.strip(),
            'is_live': True,
            'channel': channel_id,
            'channel_id': metadata.get('id'),
            'channel_url': 'https://picarto.tv/%s' % channel_id,
            'age_limit': age_limit,
            'formats': formats,
        }


class PicartoVodIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
        'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
        'info_dict': {
            'id': 'ArtofZod_2017.12.12.00.13.23.flv',
            'ext': 'mp4',
            'title': 'ArtofZod_2017.12.12.00.13.23.flv',
            'thumbnail': r're:^https?://.*\.jpg'
        },
    }, {
        'url': 'https://picarto.tv/videopopout/Plague',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        vod_info = self._parse_json(
            self._search_regex(
                r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
                'vod player'),
            video_id, transform_source=js_to_json)

        formats = self._extract_m3u8_formats(
            vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
            m3u8_id='hls')
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': video_id,
            'thumbnail': vod_info.get('vodThumb'),
            'formats': formats,
        }
Commit	Line	Data
d6166a76	1	from .common import InfoExtractor
a42839e5 S	2	from ..utils import (
	3	ExtractorError,
	4	js_to_json,
a42839e5	5	)
d6166a76 PG	6
	7
	8	class PicartoIE(InfoExtractor):
cce889b9	9	_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
d6166a76 PG	10	_TEST = {
	11	'url': 'https://picarto.tv/Setz',
	12	'info_dict': {
	13	'id': 'Setz',
	14	'ext': 'mp4',
	15	'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
	16	'timestamp': int,
	17	'is_live': True
	18	},
a42839e5	19	'skip': 'Stream is offline',
d6166a76 PG	20	}
d6166a76 PG	21
a42839e5 S	22	@classmethod
	23	def suitable(cls, url):
	24	return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
	25
d6166a76	26	def _real_extract(self, url):
cce889b9	27	channel_id = self._match_id(url)
	28
	29	data = self._download_json(
	30	'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
	31	'query': '''{
	32	channel(name: "%s") {
	33	adult
	34	id
	35	online
	36	stream_name
	37	title
	38	}
	39	getLoadBalancerUrl(channel_name: "%s") {
	40	url
	41	}
	42	}''' % (channel_id, channel_id),
	43	})['data']
	44	metadata = data['channel']
	45
	46	if metadata.get('online') == 0:
d6166a76	47	raise ExtractorError('Stream is offline', expected=True)
cce889b9	48	title = metadata['title']
d6166a76	49
a42839e5	50	cdn_data = self._download_json(
cce889b9	51	data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
cce889b9	52	channel_id, 'Downloading load balancing info')
a42839e5	53
a42839e5	54	formats = []
cce889b9	55	for source in (cdn_data.get('source') or []):
	56	source_url = source.get('url')
	57	if not source_url:
a42839e5	58	continue
cce889b9	59	source_type = source.get('type')
	60	if source_type == 'html5/application/vnd.apple.mpegurl':
	61	formats.extend(self._extract_m3u8_formats(
	62	source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
	63	elif source_type == 'html5/video/mp4':
	64	formats.append({
	65	'url': source_url,
	66	})
d6166a76 PG	67	self._sort_formats(formats)
d6166a76 PG	68
f17a24a6	69	mature = metadata.get('adult')
a42839e5 S	70	if mature is None:
	71	age_limit = None
	72	else:
	73	age_limit = 18 if mature is True else 0
	74
d6166a76 PG	75	return {
d6166a76 PG	76	'id': channel_id,
39ca3b5c	77	'title': title.strip(),
d6166a76	78	'is_live': True,
730c0d12	79	'channel': channel_id,
cce889b9	80	'channel_id': metadata.get('id'),
730c0d12	81	'channel_url': 'https://picarto.tv/%s' % channel_id,
a42839e5 S	82	'age_limit': age_limit,
a42839e5 S	83	'formats': formats,
d6166a76 PG	84	}
	85
	86
	87	class PicartoVodIE(InfoExtractor):
a42839e5 S	88	_VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
	89	_TESTS = [{
	90	'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
	91	'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
d6166a76	92	'info_dict': {
a42839e5	93	'id': 'ArtofZod_2017.12.12.00.13.23.flv',
d6166a76	94	'ext': 'mp4',
a42839e5 S	95	'title': 'ArtofZod_2017.12.12.00.13.23.flv',
	96	'thumbnail': r're:^https?://.*\.jpg'
	97	},
	98	}, {
	99	'url': 'https://picarto.tv/videopopout/Plague',
	100	'only_matching': True,
	101	}]
d6166a76 PG	102
	103	def _real_extract(self, url):
	104	video_id = self._match_id(url)
a42839e5	105
d6166a76 PG	106	webpage = self._download_webpage(url, video_id)
d6166a76 PG	107
a42839e5 S	108	vod_info = self._parse_json(
	109	self._search_regex(
	110	r'(?s)#vod-player["\']\s,\s(\{.+?\})\s*\)', webpage,
a4211baf	111	'vod player'),
a42839e5 S	112	video_id, transform_source=js_to_json)
	113
	114	formats = self._extract_m3u8_formats(
	115	vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
	116	m3u8_id='hls')
	117	self._sort_formats(formats)
d6166a76 PG	118
	119	return {
	120	'id': video_id,
	121	'title': video_id,
d6166a76	122	'thumbnail': vod_info.get('vodThumb'),
a42839e5	123	'formats': formats,
d6166a76	124	}