[yt-dlp.git] / youtube_dl / extractor / limelight.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    float_or_none,
    int_or_none,
)


class LimelightBaseIE(InfoExtractor):
    _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
    _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'

    def _call_playlist_service(self, item_id, method, fatal=True):
        return self._download_json(
            self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
            item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)

    def _call_api(self, organization_id, item_id, method):
        return self._download_json(
            self._API_URL % (organization_id, self._API_PATH, item_id, method),
            item_id, 'Downloading API %s JSON' % method)

    def _extract(self, item_id, pc_method, mobile_method, meta_method):
        pc = self._call_playlist_service(item_id, pc_method)
        metadata = self._call_api(pc['orgId'], item_id, meta_method)
        mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
        return pc, mobile, metadata

    def _extract_info(self, streams, mobile_urls, properties):
        video_id = properties['media_id']
        formats = []

        for stream in streams:
            stream_url = stream.get('url')
            if not stream_url:
                continue
            if '.f4m' in stream_url:
                formats.extend(self._extract_f4m_formats(stream_url, video_id))
            else:
                fmt = {
                    'url': stream_url,
                    'abr': float_or_none(stream.get('audioBitRate')),
                    'vbr': float_or_none(stream.get('videoBitRate')),
                    'fps': float_or_none(stream.get('videoFrameRate')),
                    'width': int_or_none(stream.get('videoWidthInPixels')),
                    'height': int_or_none(stream.get('videoHeightInPixels')),
                    'ext': determine_ext(stream_url)
                }
                rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
                if rtmp:
                    format_id = 'rtmp'
                    if stream.get('videoBitRate'):
                        format_id += '-%d' % int_or_none(stream['videoBitRate'])
                    fmt.update({
                        'url': rtmp.group('url'),
                        'play_path': rtmp.group('playpath'),
                        'app': rtmp.group('app'),
                        'ext': 'flv',
                        'format_id': format_id,
                    })
                formats.append(fmt)

        for mobile_url in mobile_urls:
            media_url = mobile_url.get('mobileUrl')
            if not media_url:
                continue
            format_id = mobile_url.get('targetMediaPlatform')
            if determine_ext(media_url) == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    media_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    preference=-1, m3u8_id=format_id))
            else:
                formats.append({
                    'url': media_url,
                    'format_id': format_id,
                    'preference': -1,
                })

        self._sort_formats(formats)

        title = properties['title']
        description = properties.get('description')
        timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
        duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
        filesize = int_or_none(properties.get('total_storage_in_bytes'))
        categories = [properties.get('category')]
        tags = properties.get('tags', [])
        thumbnails = [{
            'url': thumbnail['url'],
            'width': int_or_none(thumbnail.get('width')),
            'height': int_or_none(thumbnail.get('height')),
        } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]

        subtitles = {}
        for caption in properties.get('captions', {}):
            lang = caption.get('language_code')
            subtitles_url = caption.get('url')
            if lang and subtitles_url:
                subtitles[lang] = [{
                    'url': subtitles_url,
                }]

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
            'timestamp': timestamp,
            'duration': duration,
            'filesize': filesize,
            'categories': categories,
            'tags': tags,
            'thumbnails': thumbnails,
            'subtitles': subtitles,
        }


class LimelightMediaIE(LimelightBaseIE):
    IE_NAME = 'limelight'
    _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
    _TEST = {
        'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
        'info_dict': {
            'id': '3ffd040b522b4485b6d84effc750cd86',
            'ext': 'flv',
            'title': 'HaP and the HB Prince Trailer',
            'description': 'As Harry Potter begins his 6th year at Hogwarts School of Witchcraft and Wizardry, he discovers an old book marked mysteriously "This book is the property of the Half-Blood Prince" and begins to learn more about Lord Voldemort\'s dark past.',
            'thumbnail': 're:^https?://.*\.jpeg$',
            'duration': 144.23,
            'timestamp': 1244136834,
            'upload_date': '20090604',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }
    _PLAYLIST_SERVICE_PATH = 'media'
    _API_PATH = 'media'

    def _real_extract(self, url):
        video_id = self._match_id(url)

        pc, mobile, metadata = self._extract(
            video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')

        return self._extract_info(
            pc['playlistItems'][0].get('streams', []),
            mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
            metadata)


class LimelightChannelIE(LimelightBaseIE):
    IE_NAME = 'limelight:channel'
    _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
    _TEST = {
        'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
        'info_dict': {
            'id': 'ab6a524c379342f9b23642917020c082',
            'title': 'Javascript Sample Code',
        },
        'playlist_mincount': 3,
    }
    _PLAYLIST_SERVICE_PATH = 'channel'
    _API_PATH = 'channels'

    def _real_extract(self, url):
        channel_id = self._match_id(url)

        pc, mobile, medias = self._extract(
            channel_id, 'getPlaylistByChannelId',
            'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')

        entries = [
            self._extract_info(
                pc['playlistItems'][i].get('streams', []),
                mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
                medias['media_list'][i])
            for i in range(len(medias['media_list']))]

        return self.playlist_result(entries, channel_id, pc['title'])


class LimelightChannelListIE(LimelightBaseIE):
    IE_NAME = 'limelight:channel_list'
    _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
    _TEST = {
        'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
        'info_dict': {
            'id': '301b117890c4465c8179ede21fd92e2b',
            'title': 'Website - Hero Player',
        },
        'playlist_mincount': 2,
    }
    _PLAYLIST_SERVICE_PATH = 'channel_list'

    def _real_extract(self, url):
        channel_list_id = self._match_id(url)

        channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')

        entries = [
            self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
            for channel in channel_list['channelList']]

        return self.playlist_result(entries, channel_list_id, channel_list['title'])
Commit	Line	Data
ef5acfe3	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
ef5acfe3	8	determine_ext,
d7fc5631 S	9	float_or_none,
d7fc5631 S	10	int_or_none,
ef5acfe3	11	)
	12
	13
d7fc5631 S	14	class LimelightBaseIE(InfoExtractor):
	15	_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
	16	_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
ef5acfe3	17
d7fc5631 S	18	def _call_playlist_service(self, item_id, method, fatal=True):
	19	return self._download_json(
	20	self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
	21	item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
ef5acfe3	22
d7fc5631 S	23	def _call_api(self, organization_id, item_id, method):
	24	return self._download_json(
	25	self._API_URL % (organization_id, self._API_PATH, item_id, method),
	26	item_id, 'Downloading API %s JSON' % method)
ef5acfe3	27
d7fc5631 S	28	def _extract(self, item_id, pc_method, mobile_method, meta_method):
	29	pc = self._call_playlist_service(item_id, pc_method)
	30	metadata = self._call_api(pc['orgId'], item_id, meta_method)
	31	mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
	32	return pc, mobile, metadata
	33
	34	def _extract_info(self, streams, mobile_urls, properties):
ef5acfe3	35	video_id = properties['media_id']
	36	formats = []
	37
ef5acfe3	38	for stream in streams:
d7fc5631 S	39	stream_url = stream.get('url')
	40	if not stream_url:
	41	continue
	42	if '.f4m' in stream_url:
	43	formats.extend(self._extract_f4m_formats(stream_url, video_id))
ef5acfe3	44	else:
ef5acfe3	45	fmt = {
d7fc5631 S	46	'url': stream_url,
	47	'abr': float_or_none(stream.get('audioBitRate')),
	48	'vbr': float_or_none(stream.get('videoBitRate')),
	49	'fps': float_or_none(stream.get('videoFrameRate')),
	50	'width': int_or_none(stream.get('videoWidthInPixels')),
	51	'height': int_or_none(stream.get('videoHeightInPixels')),
	52	'ext': determine_ext(stream_url)
ef5acfe3	53	}
d7fc5631	54	rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
ef5acfe3	55	if rtmp:
d7fc5631 S	56	format_id = 'rtmp'
	57	if stream.get('videoBitRate'):
	58	format_id += '-%d' % int_or_none(stream['videoBitRate'])
ef5acfe3	59	fmt.update({
	60	'url': rtmp.group('url'),
	61	'play_path': rtmp.group('playpath'),
	62	'app': rtmp.group('app'),
d7fc5631 S	63	'ext': 'flv',
d7fc5631 S	64	'format_id': format_id,
ef5acfe3	65	})
	66	formats.append(fmt)
	67
d7fc5631 S	68	for mobile_url in mobile_urls:
	69	media_url = mobile_url.get('mobileUrl')
	70	if not media_url:
	71	continue
	72	format_id = mobile_url.get('targetMediaPlatform')
	73	if determine_ext(media_url) == 'm3u8':
	74	formats.extend(self._extract_m3u8_formats(
	75	media_url, video_id, 'mp4', entry_protocol='m3u8_native',
	76	preference=-1, m3u8_id=format_id))
	77	else:
	78	formats.append({
	79	'url': media_url,
	80	'format_id': format_id,
	81	'preference': -1,
	82	})
	83
ef5acfe3	84	self._sort_formats(formats)
	85
	86	title = properties['title']
	87	description = properties.get('description')
d7fc5631 S	88	timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
	89	duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
	90	filesize = int_or_none(properties.get('total_storage_in_bytes'))
ef5acfe3	91	categories = [properties.get('category')]
d7fc5631	92	tags = properties.get('tags', [])
ef5acfe3	93	thumbnails = [{
d7fc5631	94	'url': thumbnail['url'],
ef5acfe3	95	'width': int_or_none(thumbnail.get('width')),
ef5acfe3	96	'height': int_or_none(thumbnail.get('height')),
d7fc5631 S	97	} for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
	98
	99	subtitles = {}
	100	for caption in properties.get('captions', {}):
	101	lang = caption.get('language_code')
	102	subtitles_url = caption.get('url')
	103	if lang and subtitles_url:
	104	subtitles[lang] = [{
	105	'url': subtitles_url,
	106	}]
ef5acfe3	107
	108	return {
	109	'id': video_id,
	110	'title': title,
	111	'description': description,
	112	'formats': formats,
	113	'timestamp': timestamp,
	114	'duration': duration,
	115	'filesize': filesize,
	116	'categories': categories,
d7fc5631	117	'tags': tags,
ef5acfe3	118	'thumbnails': thumbnails,
	119	'subtitles': subtitles,
	120	}
	121
	122
d7fc5631	123	class LimelightMediaIE(LimelightBaseIE):
ef5acfe3	124	IE_NAME = 'limelight'
d7fc5631	125	_VALID_URL = r'(?:limelight:media:\|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
ef5acfe3	126	_TEST = {
ef5acfe3	127	'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
ef5acfe3	128	'info_dict': {
ef5acfe3	129	'id': '3ffd040b522b4485b6d84effc750cd86',
d7fc5631	130	'ext': 'flv',
ef5acfe3	131	'title': 'HaP and the HB Prince Trailer',
	132	'description': 'As Harry Potter begins his 6th year at Hogwarts School of Witchcraft and Wizardry, he discovers an old book marked mysteriously "This book is the property of the Half-Blood Prince" and begins to learn more about Lord Voldemort\'s dark past.',
	133	'thumbnail': 're:^https?://.*\.jpeg$',
d7fc5631	134	'duration': 144.23,
ef5acfe3	135	'timestamp': 1244136834,
d7fc5631 S	136	'upload_date': '20090604',
	137	},
	138	'params': {
	139	# rtmp download
	140	'skip_download': True,
	141	},
ef5acfe3	142	}
d7fc5631 S	143	_PLAYLIST_SERVICE_PATH = 'media'
d7fc5631 S	144	_API_PATH = 'media'
ef5acfe3	145
	146	def _real_extract(self, url):
	147	video_id = self._match_id(url)
	148
d7fc5631 S	149	pc, mobile, metadata = self._extract(
d7fc5631 S	150	video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
ef5acfe3	151
d7fc5631 S	152	return self._extract_info(
	153	pc['playlistItems'][0].get('streams', []),
	154	mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
	155	metadata)
ef5acfe3	156
ef5acfe3	157
d7fc5631	158	class LimelightChannelIE(LimelightBaseIE):
ef5acfe3	159	IE_NAME = 'limelight:channel'
d7fc5631	160	_VALID_URL = r'(?:limelight:channel:\|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
ef5acfe3	161	_TEST = {
	162	'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
	163	'info_dict': {
	164	'id': 'ab6a524c379342f9b23642917020c082',
	165	'title': 'Javascript Sample Code',
	166	},
	167	'playlist_mincount': 3,
	168	}
d7fc5631 S	169	_PLAYLIST_SERVICE_PATH = 'channel'
d7fc5631 S	170	_API_PATH = 'channels'
ef5acfe3	171
	172	def _real_extract(self, url):
	173	channel_id = self._match_id(url)
	174
d7fc5631 S	175	pc, mobile, medias = self._extract(
	176	channel_id, 'getPlaylistByChannelId',
	177	'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
ef5acfe3	178
d7fc5631 S	179	entries = [
	180	self._extract_info(
	181	pc['playlistItems'][i].get('streams', []),
	182	mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
	183	medias['media_list'][i])
	184	for i in range(len(medias['media_list']))]
ef5acfe3	185
d7fc5631	186	return self.playlist_result(entries, channel_id, pc['title'])
ef5acfe3	187
ef5acfe3	188
d7fc5631	189	class LimelightChannelListIE(LimelightBaseIE):
ef5acfe3	190	IE_NAME = 'limelight:channel_list'
d7fc5631	191	_VALID_URL = r'(?:limelight:channel_list:\|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
ef5acfe3	192	_TEST = {
	193	'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
	194	'info_dict': {
	195	'id': '301b117890c4465c8179ede21fd92e2b',
	196	'title': 'Website - Hero Player',
	197	},
	198	'playlist_mincount': 2,
	199	}
d7fc5631	200	_PLAYLIST_SERVICE_PATH = 'channel_list'
ef5acfe3	201
	202	def _real_extract(self, url):
	203	channel_list_id = self._match_id(url)
	204
d7fc5631	205	channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
ef5acfe3	206
d7fc5631 S	207	entries = [
	208	self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
	209	for channel in channel_list['channelList']]
ef5acfe3	210
d7fc5631	211	return self.playlist_result(entries, channel_list_id, channel_list['title'])