[yt-dlp.git] / yt_dlp / extractor / vidio.py

from .common import InfoExtractor
from ..utils import (
    clean_html,
    ExtractorError,
    format_field,
    get_element_by_class,
    int_or_none,
    parse_iso8601,
    smuggle_url,
    str_or_none,
    strip_or_none,
    try_get,
    unsmuggle_url,
    urlencode_postdata,
)


class VidioBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://www.vidio.com/users/login'
    _NETRC_MACHINE = 'vidio'

    def _perform_login(self, username, password):
        def is_logged_in():
            res = self._download_json(
                'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
            return bool(res.get('current_user'))

        if is_logged_in():
            return

        login_page = self._download_webpage(
            self._LOGIN_URL, None, 'Downloading log in page')

        login_form = self._form_hidden_inputs("login-form", login_page)
        login_form.update({
            'user[login]': username,
            'user[password]': password,
        })
        login_post, login_post_urlh = self._download_webpage_handle(
            self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])

        if login_post_urlh.status == 401:
            if get_element_by_class('onboarding-content-register-popup__title', login_post):
                raise ExtractorError(
                    'Unable to log in: The provided email has not registered yet.', expected=True)

            reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
            if 'Akun terhubung ke' in reason:
                raise ExtractorError(
                    'Unable to log in: Your account is linked to a social media account. '
                    'Use --cookies to provide account credentials instead', expected=True)
            elif reason:
                subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
                raise ExtractorError(
                    'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
            raise ExtractorError('Unable to log in')

    def _initialize_pre_login(self):
        self._api_key = self._download_json(
            'https://www.vidio.com/auth', None, data=b'')['api_key']

    def _call_api(self, url, video_id, note=None):
        return self._download_json(url, video_id, note=note, headers={
            'Content-Type': 'application/vnd.api+json',
            'X-API-KEY': self._api_key,
        })


class VidioIE(VidioBaseIE):
    _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
        'md5': 'cd2801394afc164e9775db6a140b91fe',
        'info_dict': {
            'id': '165683',
            'display_id': 'dj_ambred-booyah-live-2015',
            'ext': 'mp4',
            'title': 'DJ_AMBRED - Booyah (Live 2015)',
            'description': 'md5:27dc15f819b6a78a626490881adbadf8',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 149,
            'like_count': int,
            'uploader': 'TWELVE Pic',
            'timestamp': 1444902800,
            'upload_date': '20151015',
            'uploader_id': 'twelvepictures',
            'channel': 'Cover Music Video',
            'channel_id': '280236',
            'view_count': int,
            'dislike_count': int,
            'comment_count': int,
            'tags': 'count:4',
        },
    }, {
        'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
        'only_matching': True,
    }, {
        # Premier-exclusive video
        'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
        'only_matching': True
    }]

    def _real_extract(self, url):
        match = self._match_valid_url(url).groupdict()
        video_id, display_id = match.get('id'), match.get('display_id')
        data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
        video = data['videos'][0]
        title = video['title'].strip()
        is_premium = video.get('is_premium')

        if is_premium:
            sources = self._download_json(
                'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id,
                display_id, note='Downloading premier API JSON')
            if not (sources.get('source') or sources.get('source_dash')):
                self.raise_login_required('This video is only available for registered users with the appropriate subscription')

            formats, subs = [], {}
            if sources.get('source'):
                hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
                    sources['source'], display_id, 'mp4', 'm3u8_native')
                formats.extend(hls_formats)
                subs.update(hls_subs)
            if sources.get('source_dash'):  # TODO: Find video example with source_dash
                dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
                    sources['source_dash'], display_id, 'dash')
                formats.extend(dash_formats)
                subs.update(dash_subs)
        else:
            hls_url = data['clips'][0]['hls_url']
            formats, subs = self._extract_m3u8_formats_and_subtitles(
                hls_url, display_id, 'mp4', 'm3u8_native')

        self._sort_formats(formats)

        get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
        channel = get_first('channel')
        user = get_first('user')
        username = user.get('username')
        get_count = lambda x: int_or_none(video.get('total_' + x))

        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': strip_or_none(video.get('description')),
            'thumbnail': video.get('image_url_medium'),
            'duration': int_or_none(video.get('duration')),
            'like_count': get_count('likes'),
            'formats': formats,
            'subtitles': subs,
            'uploader': user.get('name'),
            'timestamp': parse_iso8601(video.get('created_at')),
            'uploader_id': username,
            'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
            'channel': channel.get('name'),
            'channel_id': str_or_none(channel.get('id')),
            'view_count': get_count('view_count'),
            'dislike_count': get_count('dislikes'),
            'comment_count': get_count('comments'),
            'tags': video.get('tag_list'),
        }


class VidioPremierIE(VidioBaseIE):
    _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
        'playlist_mincount': 14,
    }, {
        # Series with both free and premier-exclusive videos
        'url': 'https://www.vidio.com/premier/2567/sosmed',
        'only_matching': True,
    }]

    def _playlist_entries(self, playlist_url, display_id):
        index = 1
        while playlist_url:
            playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index)
            for video_json in playlist_json.get('data', []):
                link = video_json['links']['watchpage']
                yield self.url_result(link, 'Vidio', video_json['id'])
            playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
            index += 1

    def _real_extract(self, url):
        url, idata = unsmuggle_url(url, {})
        playlist_id, display_id = self._match_valid_url(url).groups()

        playlist_url = idata.get('url')
        if playlist_url:  # Smuggled data contains an API URL. Download only that playlist
            playlist_id = idata['id']
            return self.playlist_result(
                self._playlist_entries(playlist_url, playlist_id),
                playlist_id=playlist_id, playlist_title=idata.get('title'))

        playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id)

        return self.playlist_from_matches(
            playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
            getter=lambda data: smuggle_url(url, {
                'url': data['relationships']['videos']['links']['related'],
                'id': data['id'],
                'title': try_get(data, lambda x: x['attributes']['name'])
            }))


class VidioLiveIE(VidioBaseIE):
    _VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.vidio.com/live/204-sctv',
        'info_dict': {
            'id': '204',
            'title': 'SCTV',
            'uploader': 'SCTV',
            'uploader_id': 'sctv',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
        # Premier-exclusive livestream
        'url': 'https://www.vidio.com/live/6362-tvn',
        'only_matching': True,
    }, {
        # DRM premier-exclusive livestream
        'url': 'https://www.vidio.com/live/6299-bein-1',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).groups()
        stream_data = self._call_api(
            'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
        stream_meta = stream_data['livestreamings'][0]
        user = stream_data.get('users', [{}])[0]

        title = stream_meta.get('title')
        username = user.get('username')

        formats = []
        if stream_meta.get('is_drm'):
            if not self.get_param('allow_unplayable_formats'):
                self.report_drm(video_id)
        if stream_meta.get('is_premium'):
            sources = self._download_json(
                'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
                display_id, note='Downloading premier API JSON')
            if not (sources.get('source') or sources.get('source_dash')):
                self.raise_login_required('This video is only available for registered users with the appropriate subscription')

            if str_or_none(sources.get('source')):
                token_json = self._download_json(
                    'https://www.vidio.com/live/%s/tokens' % video_id,
                    display_id, note='Downloading HLS token JSON', data=b'')
                formats.extend(self._extract_m3u8_formats(
                    sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
            if str_or_none(sources.get('source_dash')):
                pass
        else:
            if stream_meta.get('stream_token_url'):
                token_json = self._download_json(
                    'https://www.vidio.com/live/%s/tokens' % video_id,
                    display_id, note='Downloading HLS token JSON', data=b'')
                formats.extend(self._extract_m3u8_formats(
                    stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
                    display_id, 'mp4', 'm3u8_native'))
            if stream_meta.get('stream_dash_url'):
                pass
            if stream_meta.get('stream_url'):
                formats.extend(self._extract_m3u8_formats(
                    stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
        self._sort_formats(formats)

        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'is_live': True,
            'description': strip_or_none(stream_meta.get('description')),
            'thumbnail': stream_meta.get('image'),
            'like_count': int_or_none(stream_meta.get('like')),
            'dislike_count': int_or_none(stream_meta.get('dislike')),
            'formats': formats,
            'uploader': user.get('name'),
            'timestamp': parse_iso8601(stream_meta.get('start_time')),
            'uploader_id': username,
            'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
        }
Commit	Line	Data
0fc832e1	1	from .common import InfoExtractor
2181983a	2	from ..utils import (
11cc4571	3	clean_html,
10bb7e51	4	ExtractorError,
e0ddbd02	5	format_field,
10bb7e51	6	get_element_by_class,
2181983a	7	int_or_none,
2181983a	8	parse_iso8601,
f2cd7060	9	smuggle_url,
2181983a	10	str_or_none,
	11	strip_or_none,
	12	try_get,
f2cd7060	13	unsmuggle_url,
10bb7e51	14	urlencode_postdata,
2181983a	15	)
7def3571 T	16
7def3571 T	17
f2cd7060	18	class VidioBaseIE(InfoExtractor):
10bb7e51 M	19	_LOGIN_URL = 'https://www.vidio.com/users/login'
	20	_NETRC_MACHINE = 'vidio'
	21
52efa4b3	22	def _perform_login(self, username, password):
10bb7e51 M	23	def is_logged_in():
	24	res = self._download_json(
	25	'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
	26	return bool(res.get('current_user'))
	27
	28	if is_logged_in():
	29	return
	30
	31	login_page = self._download_webpage(
	32	self._LOGIN_URL, None, 'Downloading log in page')
	33
	34	login_form = self._form_hidden_inputs("login-form", login_page)
	35	login_form.update({
	36	'user[login]': username,
	37	'user[password]': password,
	38	})
	39	login_post, login_post_urlh = self._download_webpage_handle(
	40	self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
	41
	42	if login_post_urlh.status == 401:
11cc4571	43	if get_element_by_class('onboarding-content-register-popup__title', login_post):
10bb7e51	44	raise ExtractorError(
11cc4571 M	45	'Unable to log in: The provided email has not registered yet.', expected=True)
	46
	47	reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
	48	if 'Akun terhubung ke' in reason:
	49	raise ExtractorError(
	50	'Unable to log in: Your account is linked to a social media account. '
	51	'Use --cookies to provide account credentials instead', expected=True)
	52	elif reason:
	53	subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
	54	raise ExtractorError(
	55	'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
10bb7e51	56	raise ExtractorError('Unable to log in')
7def3571	57
52efa4b3	58	def _initialize_pre_login(self):
2181983a	59	self._api_key = self._download_json(
2181983a	60	'https://www.vidio.com/auth', None, data=b'')['api_key']
0fc832e1	61
f2cd7060 M	62	def _call_api(self, url, video_id, note=None):
	63	return self._download_json(url, video_id, note=note, headers={
	64	'Content-Type': 'application/vnd.api+json',
	65	'X-API-KEY': self._api_key,
	66	})
	67
	68
	69	class VidioIE(VidioBaseIE):
	70	_VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
	71	_TESTS = [{
	72	'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
	73	'md5': 'cd2801394afc164e9775db6a140b91fe',
	74	'info_dict': {
	75	'id': '165683',
	76	'display_id': 'dj_ambred-booyah-live-2015',
	77	'ext': 'mp4',
	78	'title': 'DJ_AMBRED - Booyah (Live 2015)',
	79	'description': 'md5:27dc15f819b6a78a626490881adbadf8',
	80	'thumbnail': r're:^https?://.*\.jpg$',
	81	'duration': 149,
	82	'like_count': int,
	83	'uploader': 'TWELVE Pic',
	84	'timestamp': 1444902800,
	85	'upload_date': '20151015',
	86	'uploader_id': 'twelvepictures',
	87	'channel': 'Cover Music Video',
	88	'channel_id': '280236',
	89	'view_count': int,
	90	'dislike_count': int,
	91	'comment_count': int,
	92	'tags': 'count:4',
	93	},
	94	}, {
	95	'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
	96	'only_matching': True,
	97	}, {
	98	# Premier-exclusive video
	99	'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
	100	'only_matching': True
	101	}]
	102
2181983a	103	def _real_extract(self, url):
5ad28e7f	104	match = self._match_valid_url(url).groupdict()
f2cd7060 M	105	video_id, display_id = match.get('id'), match.get('display_id')
f2cd7060 M	106	data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
2181983a	107	video = data['videos'][0]
2181983a	108	title = video['title'].strip()
46c43ffc	109	is_premium = video.get('is_premium')
f2cd7060	110
46c43ffc M	111	if is_premium:
	112	sources = self._download_json(
	113	'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id,
	114	display_id, note='Downloading premier API JSON')
	115	if not (sources.get('source') or sources.get('source_dash')):
f2cd7060	116	self.raise_login_required('This video is only available for registered users with the appropriate subscription')
46c43ffc M	117
	118	formats, subs = [], {}
	119	if sources.get('source'):
	120	hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
	121	sources['source'], display_id, 'mp4', 'm3u8_native')
	122	formats.extend(hls_formats)
	123	subs.update(hls_subs)
	124	if sources.get('source_dash'): # TODO: Find video example with source_dash
	125	dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
	126	sources['source_dash'], display_id, 'dash')
	127	formats.extend(dash_formats)
	128	subs.update(dash_subs)
	129	else:
	130	hls_url = data['clips'][0]['hls_url']
	131	formats, subs = self._extract_m3u8_formats_and_subtitles(
	132	hls_url, display_id, 'mp4', 'm3u8_native')
0fc832e1	133
07ad0cf3	134	self._sort_formats(formats)
0fc832e1	135
2181983a	136	get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
	137	channel = get_first('channel')
	138	user = get_first('user')
	139	username = user.get('username')
	140	get_count = lambda x: int_or_none(video.get('total_' + x))
7def3571 T	141
	142	return {
	143	'id': video_id,
0fc832e1 S	144	'display_id': display_id,
0fc832e1 S	145	'title': title,
2181983a	146	'description': strip_or_none(video.get('description')),
	147	'thumbnail': video.get('image_url_medium'),
	148	'duration': int_or_none(video.get('duration')),
	149	'like_count': get_count('likes'),
0fc832e1	150	'formats': formats,
46c43ffc	151	'subtitles': subs,
2181983a	152	'uploader': user.get('name'),
	153	'timestamp': parse_iso8601(video.get('created_at')),
	154	'uploader_id': username,
a70635b8	155	'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
2181983a	156	'channel': channel.get('name'),
	157	'channel_id': str_or_none(channel.get('id')),
	158	'view_count': get_count('view_count'),
	159	'dislike_count': get_count('dislikes'),
	160	'comment_count': get_count('comments'),
	161	'tags': video.get('tag_list'),
7def3571	162	}
f2cd7060 M	163
	164
	165	class VidioPremierIE(VidioBaseIE):
	166	_VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
	167	_TESTS = [{
	168	'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
	169	'playlist_mincount': 14,
	170	}, {
	171	# Series with both free and premier-exclusive videos
	172	'url': 'https://www.vidio.com/premier/2567/sosmed',
	173	'only_matching': True,
	174	}]
	175
	176	def _playlist_entries(self, playlist_url, display_id):
	177	index = 1
	178	while playlist_url:
	179	playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index)
	180	for video_json in playlist_json.get('data', []):
	181	link = video_json['links']['watchpage']
	182	yield self.url_result(link, 'Vidio', video_json['id'])
	183	playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
	184	index += 1
	185
	186	def _real_extract(self, url):
	187	url, idata = unsmuggle_url(url, {})
5ad28e7f	188	playlist_id, display_id = self._match_valid_url(url).groups()
f2cd7060 M	189
	190	playlist_url = idata.get('url')
	191	if playlist_url: # Smuggled data contains an API URL. Download only that playlist
	192	playlist_id = idata['id']
	193	return self.playlist_result(
	194	self._playlist_entries(playlist_url, playlist_id),
	195	playlist_id=playlist_id, playlist_title=idata.get('title'))
	196
	197	playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id)
	198
	199	return self.playlist_from_matches(
	200	playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
	201	getter=lambda data: smuggle_url(url, {
	202	'url': data['relationships']['videos']['links']['related'],
	203	'id': data['id'],
	204	'title': try_get(data, lambda x: x['attributes']['name'])
	205	}))
	206
	207
	208	class VidioLiveIE(VidioBaseIE):
	209	_VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
	210	_TESTS = [{
	211	'url': 'https://www.vidio.com/live/204-sctv',
	212	'info_dict': {
	213	'id': '204',
	214	'title': 'SCTV',
	215	'uploader': 'SCTV',
	216	'uploader_id': 'sctv',
	217	'thumbnail': r're:^https?://.*\.jpg$',
	218	},
	219	}, {
	220	# Premier-exclusive livestream
	221	'url': 'https://www.vidio.com/live/6362-tvn',
	222	'only_matching': True,
	223	}, {
	224	# DRM premier-exclusive livestream
	225	'url': 'https://www.vidio.com/live/6299-bein-1',
	226	'only_matching': True,
	227	}]
	228
	229	def _real_extract(self, url):
5ad28e7f	230	video_id, display_id = self._match_valid_url(url).groups()
f2cd7060 M	231	stream_data = self._call_api(
	232	'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
	233	stream_meta = stream_data['livestreamings'][0]
	234	user = stream_data.get('users', [{}])[0]
	235
	236	title = stream_meta.get('title')
	237	username = user.get('username')
	238
	239	formats = []
	240	if stream_meta.get('is_drm'):
	241	if not self.get_param('allow_unplayable_formats'):
88acdbc2	242	self.report_drm(video_id)
f2cd7060 M	243	if stream_meta.get('is_premium'):
	244	sources = self._download_json(
	245	'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
	246	display_id, note='Downloading premier API JSON')
	247	if not (sources.get('source') or sources.get('source_dash')):
	248	self.raise_login_required('This video is only available for registered users with the appropriate subscription')
	249
	250	if str_or_none(sources.get('source')):
	251	token_json = self._download_json(
	252	'https://www.vidio.com/live/%s/tokens' % video_id,
	253	display_id, note='Downloading HLS token JSON', data=b'')
	254	formats.extend(self._extract_m3u8_formats(
	255	sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
	256	if str_or_none(sources.get('source_dash')):
	257	pass
	258	else:
	259	if stream_meta.get('stream_token_url'):
	260	token_json = self._download_json(
	261	'https://www.vidio.com/live/%s/tokens' % video_id,
	262	display_id, note='Downloading HLS token JSON', data=b'')
	263	formats.extend(self._extract_m3u8_formats(
	264	stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
	265	display_id, 'mp4', 'm3u8_native'))
	266	if stream_meta.get('stream_dash_url'):
	267	pass
	268	if stream_meta.get('stream_url'):
	269	formats.extend(self._extract_m3u8_formats(
	270	stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
	271	self._sort_formats(formats)
	272
	273	return {
	274	'id': video_id,
	275	'display_id': display_id,
	276	'title': title,
	277	'is_live': True,
	278	'description': strip_or_none(stream_meta.get('description')),
	279	'thumbnail': stream_meta.get('image'),
	280	'like_count': int_or_none(stream_meta.get('like')),
	281	'dislike_count': int_or_none(stream_meta.get('dislike')),
	282	'formats': formats,
	283	'uploader': user.get('name'),
	284	'timestamp': parse_iso8601(stream_meta.get('start_time')),
	285	'uploader_id': username,
a70635b8	286	'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
f2cd7060	287	}