[yt-dlp.git] / youtube_dl / extractor / viu.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    ExtractorError,
    int_or_none,
)


class ViuBaseIE(InfoExtractor):
    def _real_initialize(self):
        viu_auth_res = self._request_webpage(
            'https://www.viu.com/api/apps/v2/authenticate', None,
            'Requesting Viu auth', query={
                'acct': 'test',
                'appid': 'viu_desktop',
                'fmt': 'json',
                'iid': 'guest',
                'languageid': 'default',
                'platform': 'desktop',
                'userid': 'guest',
                'useridtype': 'guest',
                'ver': '1.0'
            })
        self._auth_token = viu_auth_res.info()['X-VIU-AUTH']

    def _call_api(self, path, *args, **kwargs):
        headers = self.geo_verification_headers()
        headers.update({
            'X-VIU-AUTH': self._auth_token
        })
        headers.update(kwargs.get('headers', {}))
        kwargs['headers'] = headers
        response = self._download_json(
            'https://www.viu.com/api/' + path, *args, **kwargs)['response']
        if response.get('status') != 'success':
            raise ExtractorError('%s said: %s' % (
                self.IE_NAME, response['message']), expected=True)
        return response


class ViuIE(ViuBaseIE):
    _VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
        'info_dict': {
            'id': '1116705532',
            'ext': 'mp4',
            'title': 'Citizen Khan - Ep 1',
            'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
        },
        'params': {
            'skip_download': 'm3u8 download',
        },
        'skip': 'Geo-restricted to India',
    }, {
        'url': 'https://www.viu.com/en/media/1130599965',
        'info_dict': {
            'id': '1130599965',
            'ext': 'mp4',
            'title': 'Jealousy Incarnate - Episode 1',
            'description': 'md5:d3d82375cab969415d2720b6894361e9',
        },
        'params': {
            'skip_download': 'm3u8 download',
        },
        'skip': 'Geo-restricted to Indonesia',
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video_data = self._call_api(
            'clip/load', video_id, 'Downloading video data', query={
                'appid': 'viu_desktop',
                'fmt': 'json',
                'id': video_id
            })['item'][0]

        title = video_data['title']

        m3u8_url = None
        url_path = video_data.get('urlpathd') or video_data.get('urlpath')
        tdirforwhole = video_data.get('tdirforwhole')
        hls_file = video_data.get('hlsfile')
        if url_path and tdirforwhole and hls_file:
            m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
        else:
            m3u8_url = re.sub(
                r'(/hlsc_)[a-z]+(\d+\.m3u8)',
                r'\1whe\2', video_data['href'])
        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
        self._sort_formats(formats)

        subtitles = {}
        for key, value in video_data.items():
            mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
            if not mobj:
                continue
            subtitles.setdefault(mobj.group('lang'), []).append({
                'url': value,
                'ext': mobj.group('ext')
            })

        return {
            'id': video_id,
            'title': title,
            'description': video_data.get('description'),
            'series': video_data.get('moviealbumshowname'),
            'episode': title,
            'episode_number': int_or_none(video_data.get('episodeno')),
            'duration': int_or_none(video_data.get('duration')),
            'formats': formats,
            'subtitles': subtitles,
        }


class ViuPlaylistIE(ViuBaseIE):
    IE_NAME = 'viu:playlist'
    _VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
    _TEST = {
        'url': 'https://www.viu.com/en/listing/playlist-22461380',
        'info_dict': {
            'id': '22461380',
            'title': 'The Good Wife',
        },
        'playlist_count': 16,
        'skip': 'Geo-restricted to Indonesia',
    }

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        playlist_data = self._call_api(
            'container/load', playlist_id,
            'Downloading playlist info', query={
                'appid': 'viu_desktop',
                'fmt': 'json',
                'id': 'playlist-' + playlist_id
            })['container']

        entries = []
        for item in playlist_data.get('item', []):
            item_id = item.get('id')
            if not item_id:
                continue
            item_id = compat_str(item_id)
            entries.append(self.url_result(
                'viu:' + item_id, 'Viu', item_id))

        return self.playlist_result(
            entries, playlist_id, playlist_data.get('title'))


class ViuOTTIE(InfoExtractor):
    IE_NAME = 'viu:ott'
    _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
        'info_dict': {
            'id': '3421',
            'ext': 'mp4',
            'title': 'A New Beginning',
            'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
        },
        'params': {
            'skip_download': 'm3u8 download',
        },
        'skip': 'Geo-restricted to Singapore',
    }, {
        'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
        'info_dict': {
            'id': '7123',
            'ext': 'mp4',
            'title': '這就是我的生活之道',
            'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
        },
        'params': {
            'skip_download': 'm3u8 download',
        },
        'skip': 'Geo-restricted to Hong Kong',
    }]

    def _real_extract(self, url):
        country_code, video_id = re.match(self._VALID_URL, url).groups()

        product_data = self._download_json(
            'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
            'Downloading video info', query={
                'r': 'vod/ajax-detail',
                'platform_flag_label': 'web',
                'product_id': video_id,
            })['data']

        video_data = product_data.get('current_product')
        if not video_data:
            raise ExtractorError('This video is not available in your region.', expected=True)

        stream_data = self._download_json(
            'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
            video_id, 'Downloading stream info', query={
                'ccs_product_id': video_data['ccs_product_id'],
            })['data']['stream']

        stream_sizes = stream_data.get('size', {})
        formats = []
        for vid_format, stream_url in stream_data.get('url', {}).items():
            height = int_or_none(self._search_regex(
                r's(\d+)p', vid_format, 'height', default=None))
            formats.append({
                'format_id': vid_format,
                'url': stream_url,
                'height': height,
                'ext': 'mp4',
                'filesize': int_or_none(stream_sizes.get(vid_format))
            })
        self._sort_formats(formats)

        subtitles = {}
        for sub in video_data.get('subtitle', []):
            sub_url = sub.get('url')
            if not sub_url:
                continue
            subtitles.setdefault(sub.get('name'), []).append({
                'url': sub_url,
                'ext': 'srt',
            })

        title = video_data['synopsis'].strip()

        return {
            'id': video_id,
            'title': title,
            'description': video_data.get('description'),
            'series': product_data.get('series', {}).get('name'),
            'episode': title,
            'episode_number': int_or_none(video_data.get('number')),
            'duration': int_or_none(stream_data.get('duration')),
            'thumbnail': video_data.get('cover_image_url'),
            'formats': formats,
            'subtitles': subtitles,
        }
Commit	Line	Data
e7b6caef	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
72310315	7	from ..compat import compat_str
e7b6caef	8	from ..utils import (
	9	ExtractorError,
	10	int_or_none,
e7b6caef	11	)
	12
	13
	14	class ViuBaseIE(InfoExtractor):
72310315	15	def _real_initialize(self):
e7b6caef	16	viu_auth_res = self._request_webpage(
72310315 RA	17	'https://www.viu.com/api/apps/v2/authenticate', None,
	18	'Requesting Viu auth', query={
	19	'acct': 'test',
	20	'appid': 'viu_desktop',
	21	'fmt': 'json',
	22	'iid': 'guest',
	23	'languageid': 'default',
	24	'platform': 'desktop',
	25	'userid': 'guest',
	26	'useridtype': 'guest',
	27	'ver': '1.0'
e7b6caef	28	})
72310315 RA	29	self._auth_token = viu_auth_res.info()['X-VIU-AUTH']
	30
	31	def _call_api(self, path, args, *kwargs):
	32	headers = self.geo_verification_headers()
	33	headers.update({
	34	'X-VIU-AUTH': self._auth_token
	35	})
	36	headers.update(kwargs.get('headers', {}))
	37	kwargs['headers'] = headers
	38	response = self._download_json(
	39	'https://www.viu.com/api/' + path, args, *kwargs)['response']
	40	if response.get('status') != 'success':
	41	raise ExtractorError('%s said: %s' % (
	42	self.IE_NAME, response['message']), expected=True)
	43	return response
e7b6caef	44
	45
	46	class ViuIE(ViuBaseIE):
72310315	47	_VALID_URL = r'(?:viu:\|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
e7b6caef	48	_TESTS = [{
e7b6caef	49	'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
	50	'info_dict': {
	51	'id': '1116705532',
	52	'ext': 'mp4',
72310315	53	'title': 'Citizen Khan - Ep 1',
e7b6caef	54	'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
	55	},
	56	'params': {
	57	'skip_download': 'm3u8 download',
	58	},
	59	'skip': 'Geo-restricted to India',
	60	}, {
	61	'url': 'https://www.viu.com/en/media/1130599965',
	62	'info_dict': {
	63	'id': '1130599965',
	64	'ext': 'mp4',
	65	'title': 'Jealousy Incarnate - Episode 1',
	66	'description': 'md5:d3d82375cab969415d2720b6894361e9',
	67	},
	68	'params': {
	69	'skip_download': 'm3u8 download',
	70	},
	71	'skip': 'Geo-restricted to Indonesia',
	72	}]
	73
	74	def _real_extract(self, url):
	75	video_id = self._match_id(url)
	76
72310315 RA	77	video_data = self._call_api(
	78	'clip/load', video_id, 'Downloading video data', query={
	79	'appid': 'viu_desktop',
	80	'fmt': 'json',
	81	'id': video_id
	82	})['item'][0]
	83
	84	title = video_data['title']
	85
	86	m3u8_url = None
	87	url_path = video_data.get('urlpathd') or video_data.get('urlpath')
	88	tdirforwhole = video_data.get('tdirforwhole')
	89	hls_file = video_data.get('hlsfile')
	90	if url_path and tdirforwhole and hls_file:
	91	m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
	92	else:
	93	m3u8_url = re.sub(
	94	r'(/hlsc_)[a-z]+(\d+\.m3u8)',
	95	r'\1whe\2', video_data['href'])
	96	formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
e7b6caef	97	self._sort_formats(formats)
	98
	99	subtitles = {}
72310315 RA	100	for key, value in video_data.items():
	101	mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt\|srt))', key)
	102	if not mobj:
	103	continue
	104	subtitles.setdefault(mobj.group('lang'), []).append({
	105	'url': value,
	106	'ext': mobj.group('ext')
	107	})
e7b6caef	108
	109	return {
	110	'id': video_id,
	111	'title': title,
72310315 RA	112	'description': video_data.get('description'),
	113	'series': video_data.get('moviealbumshowname'),
	114	'episode': title,
	115	'episode_number': int_or_none(video_data.get('episodeno')),
	116	'duration': int_or_none(video_data.get('duration')),
e7b6caef	117	'formats': formats,
	118	'subtitles': subtitles,
	119	}
	120
	121
	122	class ViuPlaylistIE(ViuBaseIE):
	123	IE_NAME = 'viu:playlist'
72310315	124	_VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
e7b6caef	125	_TEST = {
	126	'url': 'https://www.viu.com/en/listing/playlist-22461380',
	127	'info_dict': {
72310315	128	'id': '22461380',
e7b6caef	129	'title': 'The Good Wife',
	130	},
	131	'playlist_count': 16,
	132	'skip': 'Geo-restricted to Indonesia',
	133	}
	134
	135	def _real_extract(self, url):
	136	playlist_id = self._match_id(url)
72310315 RA	137	playlist_data = self._call_api(
	138	'container/load', playlist_id,
	139	'Downloading playlist info', query={
	140	'appid': 'viu_desktop',
	141	'fmt': 'json',
	142	'id': 'playlist-' + playlist_id
	143	})['container']
	144
	145	entries = []
	146	for item in playlist_data.get('item', []):
	147	item_id = item.get('id')
	148	if not item_id:
	149	continue
	150	item_id = compat_str(item_id)
	151	entries.append(self.url_result(
	152	'viu:' + item_id, 'Viu', item_id))
	153
	154	return self.playlist_result(
	155	entries, playlist_id, playlist_data.get('title'))
	156
	157
	158	class ViuOTTIE(InfoExtractor):
	159	IE_NAME = 'viu:ott'
	160	_VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
	161	_TESTS = [{
	162	'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
	163	'info_dict': {
	164	'id': '3421',
	165	'ext': 'mp4',
	166	'title': 'A New Beginning',
	167	'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
	168	},
	169	'params': {
	170	'skip_download': 'm3u8 download',
	171	},
	172	'skip': 'Geo-restricted to Singapore',
	173	}, {
	174	'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
	175	'info_dict': {
	176	'id': '7123',
	177	'ext': 'mp4',
	178	'title': '這就是我的生活之道',
	179	'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
	180	},
	181	'params': {
	182	'skip_download': 'm3u8 download',
	183	},
	184	'skip': 'Geo-restricted to Hong Kong',
	185	}]
	186
	187	def _real_extract(self, url):
	188	country_code, video_id = re.match(self._VALID_URL, url).groups()
	189
	190	product_data = self._download_json(
	191	'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
	192	'Downloading video info', query={
	193	'r': 'vod/ajax-detail',
	194	'platform_flag_label': 'web',
	195	'product_id': video_id,
	196	})['data']
	197
	198	video_data = product_data.get('current_product')
	199	if not video_data:
	200	raise ExtractorError('This video is not available in your region.', expected=True)
201
202	stream_data = self._download_json(
203	'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
204	video_id, 'Downloading stream info', query={
205	'ccs_product_id': video_data['ccs_product_id'],
206	})['data']['stream']
207
208	stream_sizes = stream_data.get('size', {})
209	formats = []
210	for vid_format, stream_url in stream_data.get('url', {}).items():
211	height = int_or_none(self._search_regex(
212	r's(\d+)p', vid_format, 'height', default=None))
213	formats.append({
214	'format_id': vid_format,
215	'url': stream_url,
216	'height': height,
217	'ext': 'mp4',
218	'filesize': int_or_none(stream_sizes.get(vid_format))
219	})
220	self._sort_formats(formats)
221
222	subtitles = {}
223	for sub in video_data.get('subtitle', []):
224	sub_url = sub.get('url')
225	if not sub_url:
226	continue
227	subtitles.setdefault(sub.get('name'), []).append({
228	'url': sub_url,
229	'ext': 'srt',
230	})
231
232	title = video_data['synopsis'].strip()
233
234	return {
235	'id': video_id,
236	'title': title,
237	'description': video_data.get('description'),
238	'series': product_data.get('series', {}).get('name'),
239	'episode': title,
240	'episode_number': int_or_none(video_data.get('number')),
241	'duration': int_or_none(stream_data.get('duration')),
242	'thumbnail': video_data.get('cover_image_url'),
243	'formats': formats,
244	'subtitles': subtitles,
245	}