[yt-dlp.git] / youtube_dl / extractor / viewster.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_HTTPError,
    compat_urllib_parse_unquote,
)
from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    parse_iso8601,
    sanitized_Request,
    HEADRequest,
    url_basename,
)


class ViewsterIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
    _TESTS = [{
        # movie, Type=Movie
        'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
        'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
        'info_dict': {
            'id': '1140-11855-000',
            'ext': 'mp4',
            'title': 'The listening Project',
            'description': 'md5:bac720244afd1a8ea279864e67baa071',
            'timestamp': 1214870400,
            'upload_date': '20080701',
            'duration': 4680,
        },
    }, {
        # series episode, Type=Episode
        'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
        'md5': '9243079a8531809efe1b089db102c069',
        'info_dict': {
            'id': '1284-19427-001',
            'ext': 'mp4',
            'title': 'The World and a Wall',
            'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
            'timestamp': 1428192000,
            'upload_date': '20150405',
            'duration': 1500,
        },
    }, {
        # serie, Type=Serie
        'url': 'http://www.viewster.com/serie/1303-19426-000/',
        'info_dict': {
            'id': '1303-19426-000',
            'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
            'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
        },
        'playlist_count': 13,
    }, {
        # unfinished serie, no Type
        'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
        'info_dict': {
            'id': '1284-19427-000',
            'title': 'Baby Steps—Season 2',
            'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
        },
        'playlist_mincount': 16,
    }, {
        # geo restricted series
        'url': 'https://www.viewster.com/serie/1280-18794-002/',
        'only_matching': True,
    }, {
        # geo restricted video
        'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
        'only_matching': True,
    }]

    _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'

    def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
        request = sanitized_Request(url)
        request.add_header('Accept', self._ACCEPT_HEADER)
        request.add_header('Auth-token', self._AUTH_TOKEN)
        return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)

    def _real_extract(self, url):
        video_id = self._match_id(url)
        # Get 'api_token' cookie
        self._request_webpage(
            HEADRequest('http://www.viewster.com/'),
            video_id, headers=self.geo_verification_headers())
        cookies = self._get_cookies('http://www.viewster.com/')
        self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)

        info = self._download_json(
            'https://public-api.viewster.com/search/%s' % video_id,
            video_id, 'Downloading entry JSON')

        entry_id = info.get('Id') or info['id']

        # unfinished serie has no Type
        if info.get('Type') in ('Serie', None):
            try:
                episodes = self._download_json(
                    'https://public-api.viewster.com/series/%s/episodes' % entry_id,
                    video_id, 'Downloading series JSON')
            except ExtractorError as e:
                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
                    self.raise_geo_restricted()
                else:
                    raise
            entries = [
                self.url_result(
                    'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
                for episode in episodes]
            title = (info.get('Title') or info['Synopsis']['Title']).strip()
            description = info.get('Synopsis', {}).get('Detailed')
            return self.playlist_result(entries, video_id, title, description)

        formats = []
        for language_set in info.get('LanguageSets', []):
            manifest_url = None
            m3u8_formats = []
            audio = language_set.get('Audio') or ''
            subtitle = language_set.get('Subtitle') or ''
            base_format_id = audio
            if subtitle:
                base_format_id += '-%s' % subtitle

            def concat(suffix, sep='-'):
                return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix

            for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
                media = self._download_json(
                    'https://public-api.viewster.com/movies/%s/video' % entry_id,
                    video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
                        'mediaType': media_type,
                        'language': audio,
                        'subtitle': subtitle,
                    })
                if not media:
                    continue
                video_url = media.get('Uri')
                if not video_url:
                    continue
                ext = determine_ext(video_url)
                if ext == 'f4m':
                    manifest_url = video_url
                    video_url += '&' if '?' in video_url else '?'
                    video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
                    formats.extend(self._extract_f4m_formats(
                        video_url, video_id, f4m_id=concat('hds')))
                elif ext == 'm3u8':
                    manifest_url = video_url
                    m3u8_formats = self._extract_m3u8_formats(
                        video_url, video_id, 'mp4', m3u8_id=concat('hls'),
                        fatal=False)  # m3u8 sometimes fail
                    if m3u8_formats:
                        formats.extend(m3u8_formats)
                else:
                    qualities_basename = self._search_regex(
                        r'/([^/]+)\.csmil/',
                        manifest_url, 'qualities basename', default=None)
                    if not qualities_basename:
                        continue
                    QUALITIES_RE = r'((,\d+k)+,?)'
                    qualities = self._search_regex(
                        QUALITIES_RE, qualities_basename,
                        'qualities', default=None)
                    if not qualities:
                        continue
                    qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
                    qualities.sort()
                    http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
                    http_url_basename = url_basename(video_url)
                    if m3u8_formats:
                        self._sort_formats(m3u8_formats)
                        m3u8_formats = list(filter(
                            lambda f: f.get('vcodec') != 'none', m3u8_formats))
                    if len(qualities) == len(m3u8_formats):
                        for q, m3u8_format in zip(qualities, m3u8_formats):
                            f = m3u8_format.copy()
                            f.update({
                                'url': video_url.replace(http_url_basename, http_template % q),
                                'format_id': f['format_id'].replace('hls', 'http'),
                                'protocol': 'http',
                            })
                            formats.append(f)
                    else:
                        for q in qualities:
                            formats.append({
                                'url': video_url.replace(http_url_basename, http_template % q),
                                'ext': 'mp4',
                                'format_id': 'http-%d' % q,
                                'tbr': q,
                            })

        if not formats and not info.get('VODSettings'):
            self.raise_geo_restricted()

        self._sort_formats(formats)

        synopsis = info.get('Synopsis') or {}
        # Prefer title outside synopsis since it's less messy
        title = (info.get('Title') or synopsis['Title']).strip()
        description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
        duration = int_or_none(info.get('Duration'))
        timestamp = parse_iso8601(info.get('ReleaseDate'))

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'timestamp': timestamp,
            'duration': duration,
            'formats': formats,
        }
Commit	Line	Data
b68a2613	1	# coding: utf-8
3647136f S	2	from __future__ import unicode_literals
3647136f S	3
864d5e72	4	import re
864d5e72	5
3647136f	6	from .common import InfoExtractor
b68a2613	7	from ..compat import (
cccedc1a	8	compat_HTTPError,
1f048735	9	compat_urllib_parse_unquote,
b68a2613 S	10	)
	11	from ..utils import (
	12	determine_ext,
cccedc1a	13	ExtractorError,
b68a2613 S	14	int_or_none,
b68a2613 S	15	parse_iso8601,
5c2266df	16	sanitized_Request,
30a45388	17	HEADRequest,
864d5e72	18	url_basename,
b68a2613	19	)
3647136f S	20
	21
	22	class ViewsterIE(InfoExtractor):
92085e70	23	_VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie\|movie)/(?P<id>\d+-\d+-\d+)'
7be5a62e	24	_TESTS = [{
b68a2613	25	# movie, Type=Movie
7be5a62e	26	'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
cb4e4219	27	'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
7be5a62e S	28	'info_dict': {
7be5a62e S	29	'id': '1140-11855-000',
cb4e4219	30	'ext': 'mp4',
b68a2613 S	31	'title': 'The listening Project',
	32	'description': 'md5:bac720244afd1a8ea279864e67baa071',
	33	'timestamp': 1214870400,
	34	'upload_date': '20080701',
	35	'duration': 4680,
	36	},
7be5a62e	37	}, {
b68a2613 S	38	# series episode, Type=Episode
b68a2613 S	39	'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
cb4e4219	40	'md5': '9243079a8531809efe1b089db102c069',
7be5a62e	41	'info_dict': {
b68a2613	42	'id': '1284-19427-001',
cb4e4219	43	'ext': 'mp4',
b68a2613 S	44	'title': 'The World and a Wall',
	45	'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
	46	'timestamp': 1428192000,
	47	'upload_date': '20150405',
	48	'duration': 1500,
	49	},
	50	}, {
	51	# serie, Type=Serie
	52	'url': 'http://www.viewster.com/serie/1303-19426-000/',
	53	'info_dict': {
	54	'id': '1303-19426-000',
	55	'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
	56	'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
	57	},
	58	'playlist_count': 13,
	59	}, {
	60	# unfinished serie, no Type
	61	'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
	62	'info_dict': {
	63	'id': '1284-19427-000',
	64	'title': 'Baby Steps—Season 2',
	65	'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
	66	},
	67	'playlist_mincount': 16,
7ce50a35 S	68	}, {
	69	# geo restricted series
	70	'url': 'https://www.viewster.com/serie/1280-18794-002/',
	71	'only_matching': True,
	72	}, {
	73	# geo restricted video
	74	'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
	75	'only_matching': True,
7be5a62e	76	}]
3647136f S	77
	78	_ACCEPT_HEADER = 'application/json, text/javascript, /; q=0.01'
	79
0ba9e3ca	80	def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
5c2266df	81	request = sanitized_Request(url)
3647136f	82	request.add_header('Accept', self._ACCEPT_HEADER)
b68a2613	83	request.add_header('Auth-token', self._AUTH_TOKEN)
0ba9e3ca	84	return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
3647136f	85
b68a2613 S	86	def _real_extract(self, url):
b68a2613 S	87	video_id = self._match_id(url)
799207e8	88	# Get 'api_token' cookie
31615ac2 RA	89	self._request_webpage(
	90	HEADRequest('http://www.viewster.com/'),
	91	video_id, headers=self.geo_verification_headers())
92085e70	92	cookies = self._get_cookies('http://www.viewster.com/')
1f048735	93	self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
7be5a62e	94
b68a2613 S	95	info = self._download_json(
	96	'https://public-api.viewster.com/search/%s' % video_id,
	97	video_id, 'Downloading entry JSON')
7be5a62e	98
b68a2613	99	entry_id = info.get('Id') or info['id']
7be5a62e	100
b68a2613	101	# unfinished serie has no Type
d0fed4ac	102	if info.get('Type') in ('Serie', None):
cccedc1a S	103	try:
	104	episodes = self._download_json(
	105	'https://public-api.viewster.com/series/%s/episodes' % entry_id,
	106	video_id, 'Downloading series JSON')
	107	except ExtractorError as e:
	108	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
	109	self.raise_geo_restricted()
	110	else:
	111	raise
b68a2613 S	112	entries = [
	113	self.url_result(
	114	'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
	115	for episode in episodes]
c84683c8	116	title = (info.get('Title') or info['Synopsis']['Title']).strip()
b68a2613 S	117	description = info.get('Synopsis', {}).get('Detailed')
b68a2613 S	118	return self.playlist_result(entries, video_id, title, description)
7be5a62e	119
b68a2613	120	formats = []
0ba9e3ca	121	for language_set in info.get('LanguageSets', []):
	122	manifest_url = None
	123	m3u8_formats = []
	124	audio = language_set.get('Audio') or ''
	125	subtitle = language_set.get('Subtitle') or ''
	126	base_format_id = audio
	127	if subtitle:
	128	base_format_id += '-%s' % subtitle
	129
	130	def concat(suffix, sep='-'):
	131	return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
	132
	133	for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
	134	media = self._download_json(
	135	'https://public-api.viewster.com/movies/%s/video' % entry_id,
	136	video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
	137	'mediaType': media_type,
	138	'language': audio,
	139	'subtitle': subtitle,
	140	})
	141	if not media:
c14dc00d	142	continue
0ba9e3ca	143	video_url = media.get('Uri')
0ba9e3ca	144	if not video_url:
c14dc00d	145	continue
0ba9e3ca	146	ext = determine_ext(video_url)
	147	if ext == 'f4m':
	148	manifest_url = video_url
	149	video_url += '&' if '?' in video_url else '?'
	150	video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
	151	formats.extend(self._extract_f4m_formats(
	152	video_url, video_id, f4m_id=concat('hds')))
	153	elif ext == 'm3u8':
	154	manifest_url = video_url
	155	m3u8_formats = self._extract_m3u8_formats(
	156	video_url, video_id, 'mp4', m3u8_id=concat('hls'),
	157	fatal=False) # m3u8 sometimes fail
	158	if m3u8_formats:
	159	formats.extend(m3u8_formats)
f1f87909	160	else:
0ba9e3ca	161	qualities_basename = self._search_regex(
ec85ded8	162	r'/([^/]+)\.csmil/',
0ba9e3ca	163	manifest_url, 'qualities basename', default=None)
	164	if not qualities_basename:
	165	continue
	166	QUALITIES_RE = r'((,\d+k)+,?)'
	167	qualities = self._search_regex(
	168	QUALITIES_RE, qualities_basename,
	169	'qualities', default=None)
	170	if not qualities:
	171	continue
	172	qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
	173	qualities.sort()
	174	http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
	175	http_url_basename = url_basename(video_url)
	176	if m3u8_formats:
	177	self._sort_formats(m3u8_formats)
	178	m3u8_formats = list(filter(
ff99fe52	179	lambda f: f.get('vcodec') != 'none', m3u8_formats))
0ba9e3ca	180	if len(qualities) == len(m3u8_formats):
	181	for q, m3u8_format in zip(qualities, m3u8_formats):
	182	f = m3u8_format.copy()
	183	f.update({
	184	'url': video_url.replace(http_url_basename, http_template % q),
	185	'format_id': f['format_id'].replace('hls', 'http'),
	186	'protocol': 'http',
	187	})
	188	formats.append(f)
	189	else:
	190	for q in qualities:
	191	formats.append({
	192	'url': video_url.replace(http_url_basename, http_template % q),
	193	'ext': 'mp4',
	194	'format_id': 'http-%d' % q,
	195	'tbr': q,
	196	})
	197
	198	if not formats and not info.get('VODSettings'):
9612f233 S	199	self.raise_geo_restricted()
9612f233 S	200
b68a2613	201	self._sort_formats(formats)
7be5a62e	202
485139c1	203	synopsis = info.get('Synopsis') or {}
b68a2613	204	# Prefer title outside synopsis since it's less messy
c84683c8	205	title = (info.get('Title') or synopsis['Title']).strip()
485139c1	206	description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
b68a2613 S	207	duration = int_or_none(info.get('Duration'))
	208	timestamp = parse_iso8601(info.get('ReleaseDate'))
	209
	210	return {
	211	'id': video_id,
	212	'title': title,
	213	'description': description,
	214	'timestamp': timestamp,
	215	'duration': duration,
	216	'formats': formats,
	217	}