[yt-dlp.git] / youtube_dl / extractor / theplatform.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import re
import time
import hmac
import binascii
import hashlib


from .common import InfoExtractor
from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
)
from ..utils import (
    determine_ext,
    ExtractorError,
    float_or_none,
    int_or_none,
    sanitized_Request,
    unsmuggle_url,
    url_basename,
    xpath_with_ns,
)

default_ns = 'http://www.w3.org/2005/SMIL21/Language'
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})


class ThePlatformBaseIE(InfoExtractor):
    def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
        meta = self._download_xml(smil_url, video_id, note=note)
        try:
            error_msg = next(
                n.attrib['abstract']
                for n in meta.findall(_x('.//smil:ref'))
                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
        except StopIteration:
            pass
        else:
            raise ExtractorError(error_msg, expected=True)

        formats = self._parse_smil_formats(
            meta, smil_url, video_id, namespace=default_ns,
            # the parameters are from syfy.com, other sites may use others,
            # they also work for nbc.com
            f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
            transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))

        for _format in formats:
            ext = determine_ext(_format['url'])
            if ext == 'once':
                _format['ext'] = 'mp4'

        self._sort_formats(formats)

        subtitles = self._parse_smil_subtitles(meta, default_ns)

        return formats, subtitles

    def get_metadata(self, path, video_id):
        info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
        info = self._download_json(info_url, video_id)

        subtitles = {}
        captions = info.get('captions')
        if isinstance(captions, list):
            for caption in captions:
                lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
                subtitles[lang] = [{
                    'ext': 'srt' if mime == 'text/srt' else 'ttml',
                    'url': src,
                }]

        return {
            'title': info['title'],
            'subtitles': subtitles,
            'description': info['description'],
            'thumbnail': info['defaultThumbnailUrl'],
            'duration': int_or_none(info.get('duration'), 1000),
        }


class ThePlatformIE(ThePlatformBaseIE):
    _VALID_URL = r'''(?x)
        (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
           (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
         |theplatform:)(?P<id>[^/\?&]+)'''

    _TESTS = [{
        # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
        'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
        'info_dict': {
            'id': 'e9I_cZgTgIPd',
            'ext': 'flv',
            'title': 'Blackberry\'s big, bold Z30',
            'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
            'duration': 247,
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }, {
        # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
        'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
        'info_dict': {
            'id': '22d_qsQ6MIRT',
            'ext': 'flv',
            'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
            'title': 'Tesla Model S: A second step towards a cleaner motoring future',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        }
    }, {
        'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
        'info_dict': {
            'id': 'yMBg9E8KFxZD',
            'ext': 'mp4',
            'description': 'md5:644ad9188d655b742f942bf2e06b002d',
            'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
        }
    }, {
        'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
        'only_matching': True,
    }, {
        'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
        'md5': '734f3790fb5fc4903da391beeebc4836',
        'info_dict': {
            'id': 'tdy_or_siri_150701',
            'ext': 'mp4',
            'title': 'iPhone Siri’s sassy response to a math question has people talking',
            'description': 'md5:a565d1deadd5086f3331d57298ec6333',
            'duration': 83.0,
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1435752600,
            'upload_date': '20150701',
            'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
        },
    }, {
        # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
        # geo-restricted (US), HLS encrypted with AES-128
        'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
        'only_matching': True,
    }]

    @staticmethod
    def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
        flags = '10' if include_qs else '00'
        expiration_date = '%x' % (int(time.time()) + life)

        def str_to_hex(str):
            return binascii.b2a_hex(str.encode('ascii')).decode('ascii')

        def hex_to_str(hex):
            return binascii.a2b_hex(hex)

        relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
        clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
        checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
        sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
        return '%s&sig=%s' % (url, sig)

    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})

        mobj = re.match(self._VALID_URL, url)
        provider_id = mobj.group('provider_id')
        video_id = mobj.group('id')

        if not provider_id:
            provider_id = 'dJ5BDC'

        path = provider_id
        if mobj.group('media'):
            path += '/media'
        path += '/' + video_id

        qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
        if 'guid' in qs_dict:
            webpage = self._download_webpage(url, video_id)
            scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
            feed_id = None
            # feed id usually locates in the last script.
            # Seems there's no pattern for the interested script filename, so
            # I try one by one
            for script in reversed(scripts):
                feed_script = self._download_webpage(
                    self._proto_relative_url(script, 'http:'),
                    video_id, 'Downloading feed script')
                feed_id = self._search_regex(
                    r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
                    'default feed id', default=None)
                if feed_id is not None:
                    break
            if feed_id is None:
                raise ExtractorError('Unable to find feed id')
            return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
                provider_id, feed_id, qs_dict['guid'][0]))

        if smuggled_data.get('force_smil_url', False):
            smil_url = url
        # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
        elif '/guid/' in url:
            headers = {}
            source_url = smuggled_data.get('source_url')
            if source_url:
                headers['Referer'] = source_url
            request = sanitized_Request(url, headers=headers)
            webpage = self._download_webpage(request, video_id)
            smil_url = self._search_regex(
                r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
                webpage, 'smil url', group='url')
            path = self._search_regex(
                r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
            smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
        elif mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
            config_url = config_url.replace('onsite/', 'onsite/config/')
            config = self._download_json(config_url, video_id, 'Downloading config')
            if 'releaseUrl' in config:
                release_url = config['releaseUrl']
            else:
                release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
            smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
        else:
            smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path

        sig = smuggled_data.get('sig')
        if sig:
            smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])

        formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)

        ret = self.get_metadata(path, video_id)
        combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
        ret.update({
            'id': video_id,
            'formats': formats,
            'subtitles': combined_subtitles,
        })

        return ret


class ThePlatformFeedIE(ThePlatformBaseIE):
    _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
    _TEST = {
        # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
        'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
        'md5': '22d2b84f058d3586efcd99e57d59d314',
        'info_dict': {
            'id': 'n_hardball_5biden_140207',
            'ext': 'mp4',
            'title': 'The Biden factor: will Joe run in 2016?',
            'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
            'thumbnail': 're:^https?://.*\.jpg$',
            'upload_date': '20140208',
            'timestamp': 1391824260,
            'duration': 467.0,
            'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('id')
        provider_id = mobj.group('provider_id')
        feed_id = mobj.group('feed_id')

        real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
        feed = self._download_json(real_url, video_id)
        entry = feed['entries'][0]

        formats = []
        subtitles = {}
        first_video_id = None
        duration = None
        for item in entry['media$content']:
            smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
            cur_video_id = url_basename(smil_url)
            if first_video_id is None:
                first_video_id = cur_video_id
                duration = float_or_none(item.get('plfile$duration'))
            cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
            formats.extend(cur_formats)
            subtitles = self._merge_subtitles(subtitles, cur_subtitles)

        self._sort_formats(formats)

        thumbnails = [{
            'url': thumbnail['plfile$url'],
            'width': int_or_none(thumbnail.get('plfile$width')),
            'height': int_or_none(thumbnail.get('plfile$height')),
        } for thumbnail in entry.get('media$thumbnails', [])]

        timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
        categories = [item['media$name'] for item in entry.get('media$categories', [])]

        ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
        subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
        ret.update({
            'id': video_id,
            'formats': formats,
            'subtitles': subtitles,
            'thumbnails': thumbnails,
            'duration': duration,
            'timestamp': timestamp,
            'categories': categories,
        })

        return ret
Commit	Line	Data
aa6cd05e	1	# -- coding: utf-8 --
ed86f38a JMF	2	from __future__ import unicode_literals
ed86f38a JMF	3
e9bf7479	4	import re
9fb2f1cd S	5	import time
	6	import hmac
	7	import binascii
	8	import hashlib
	9
e9bf7479	10
8807f127	11	from .common import InfoExtractor
05fe2594 YCH	12	from ..compat import (
	13	compat_parse_qs,
	14	compat_urllib_parse_urlparse,
	15	)
1cc79574	16	from ..utils import (
10e3d734	17	determine_ext,
f8b56e95	18	ExtractorError,
18e4088f	19	float_or_none,
402a3efc	20	int_or_none,
18e4088f S	21	sanitized_Request,
18e4088f S	22	unsmuggle_url,
26e1c351	23	url_basename,
18e4088f	24	xpath_with_ns,
e9bf7479 JMF	25	)
e9bf7479 JMF	26
f877c6ae YCH	27	default_ns = 'http://www.w3.org/2005/SMIL21/Language'
f877c6ae YCH	28	_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
e9bf7479 JMF	29
e9bf7479 JMF	30
26e1c351	31	class ThePlatformBaseIE(InfoExtractor):
c687ac74	32	def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
26e1c351 YCH	33	meta = self._download_xml(smil_url, video_id, note=note)
	34	try:
	35	error_msg = next(
	36	n.attrib['abstract']
	37	for n in meta.findall(_x('.//smil:ref'))
	38	if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
	39	except StopIteration:
	40	pass
	41	else:
	42	raise ExtractorError(error_msg, expected=True)
	43
	44	formats = self._parse_smil_formats(
	45	meta, smil_url, video_id, namespace=default_ns,
	46	# the parameters are from syfy.com, other sites may use others,
	47	# they also work for nbc.com
	48	f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
	49	transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
	50
	51	for _format in formats:
	52	ext = determine_ext(_format['url'])
	53	if ext == 'once':
	54	_format['ext'] = 'mp4'
	55
	56	self._sort_formats(formats)
	57
c687ac74 YCH	58	subtitles = self._parse_smil_subtitles(meta, default_ns)
	59
	60	return formats, subtitles
26e1c351 YCH	61
	62	def get_metadata(self, path, video_id):
	63	info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
dd565ac1	64	info = self._download_json(info_url, video_id)
26e1c351 YCH	65
	66	subtitles = {}
	67	captions = info.get('captions')
	68	if isinstance(captions, list):
	69	for caption in captions:
	70	lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
	71	subtitles[lang] = [{
	72	'ext': 'srt' if mime == 'text/srt' else 'ttml',
	73	'url': src,
	74	}]
	75
	76	return {
	77	'title': info['title'],
	78	'subtitles': subtitles,
	79	'description': info['description'],
	80	'thumbnail': info['defaultThumbnailUrl'],
	81	'duration': int_or_none(info.get('duration'), 1000),
	82	}
	83
	84
	85	class ThePlatformIE(ThePlatformBaseIE):
a97bcd80	86	_VALID_URL = r'''(?x)
9fb2f1cd	87	(?:https?://(?:link\|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
bd5bc0cd	88	(?:(?P<media>(?:[^/]+/)+select/media/)\|(?P<config>(?:[^/\?]+/(?:swf\|config)\|onsite)/select/))?
a97bcd80	89	\|theplatform:)(?P<id>[^/\?&]+)'''
e9bf7479	90
bd7a6478	91	_TESTS = [{
e9bf7479	92	# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
ed86f38a JMF	93	'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
	94	'info_dict': {
	95	'id': 'e9I_cZgTgIPd',
	96	'ext': 'flv',
	97	'title': 'Blackberry\'s big, bold Z30',
	98	'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
	99	'duration': 247,
e9bf7479	100	},
ed86f38a	101	'params': {
e9bf7479	102	# rtmp download
ed86f38a	103	'skip_download': True,
e9bf7479	104	},
bd7a6478	105	}, {
372f08c9	106	# from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
bd7a6478 YCH	107	'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
	108	'info_dict': {
	109	'id': '22d_qsQ6MIRT',
	110	'ext': 'flv',
	111	'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
	112	'title': 'Tesla Model S: A second step towards a cleaner motoring future',
	113	},
	114	'params': {
	115	# rtmp download
	116	'skip_download': True,
	117	}
6e054aac S	118	}, {
	119	'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
	120	'info_dict': {
	121	'id': 'yMBg9E8KFxZD',
	122	'ext': 'mp4',
	123	'description': 'md5:644ad9188d655b742f942bf2e06b002d',
	124	'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
	125	}
	126	}, {
	127	'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
	128	'only_matching': True,
05fe2594 YCH	129	}, {
	130	'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
	131	'md5': '734f3790fb5fc4903da391beeebc4836',
	132	'info_dict': {
	133	'id': 'tdy_or_siri_150701',
	134	'ext': 'mp4',
	135	'title': 'iPhone Siri’s sassy response to a math question has people talking',
	136	'description': 'md5:a565d1deadd5086f3331d57298ec6333',
	137	'duration': 83.0,
	138	'thumbnail': 're:^https?://.*\.jpg$',
	139	'timestamp': 1435752600,
	140	'upload_date': '20150701',
	141	'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
	142	},
9a4acbfa S	143	}, {
	144	# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
	145	# geo-restricted (US), HLS encrypted with AES-128
	146	'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
	147	'only_matching': True,
bd7a6478	148	}]
5f6a1245	149
9fb2f1cd S	150	@staticmethod
	151	def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
	152	flags = '10' if include_qs else '00'
	153	expiration_date = '%x' % (int(time.time()) + life)
	154
	155	def str_to_hex(str):
	156	return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
	157
	158	def hex_to_str(hex):
	159	return binascii.a2b_hex(hex)
	160
	161	relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
	162	clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
	163	checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
	164	sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
	165	return '%s&sig=%s' % (url, sig)
	166
10e3d734	167	def _real_extract(self, url):
9fb2f1cd S	168	url, smuggled_data = unsmuggle_url(url, {})
9fb2f1cd S	169
10e3d734	170	mobj = re.match(self._VALID_URL, url)
9fb2f1cd	171	provider_id = mobj.group('provider_id')
10e3d734	172	video_id = mobj.group('id')
9fb2f1cd S	173
	174	if not provider_id:
	175	provider_id = 'dJ5BDC'
	176
6e054aac S	177	path = provider_id
	178	if mobj.group('media'):
	179	path += '/media'
	180	path += '/' + video_id
	181
05fe2594 YCH	182	qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
	183	if 'guid' in qs_dict:
	184	webpage = self._download_webpage(url, video_id)
	185	scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
	186	feed_id = None
	187	# feed id usually locates in the last script.
	188	# Seems there's no pattern for the interested script filename, so
	189	# I try one by one
	190	for script in reversed(scripts):
ee5cd841	191	feed_script = self._download_webpage(
325bb615 S	192	self._proto_relative_url(script, 'http:'),
	193	video_id, 'Downloading feed script')
	194	feed_id = self._search_regex(
	195	r'defaultFeedId\s:\s"([^"]+)"', feed_script,
	196	'default feed id', default=None)
05fe2594 YCH	197	if feed_id is not None:
	198	break
	199	if feed_id is None:
	200	raise ExtractorError('Unable to find feed id')
	201	return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
	202	provider_id, feed_id, qs_dict['guid'][0]))
	203
6140baf4 JMF	204	if smuggled_data.get('force_smil_url', False):
6140baf4 JMF	205	smil_url = url
ad1f4e79 S	206	# Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
ad1f4e79 S	207	elif '/guid/' in url:
18e4088f S	208	headers = {}
	209	source_url = smuggled_data.get('source_url')
	210	if source_url:
	211	headers['Referer'] = source_url
	212	request = sanitized_Request(url, headers=headers)
	213	webpage = self._download_webpage(request, video_id)
ad1f4e79 S	214	smil_url = self._search_regex(
	215	r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
	216	webpage, 'smil url', group='url')
	217	path = self._search_regex(
	218	r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
	219	smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
6140baf4	220	elif mobj.group('config'):
5f6a1245	221	config_url = url + '&form=json'
10e3d734 PH	222	config_url = config_url.replace('swf/', 'config/')
	223	config_url = config_url.replace('onsite/', 'onsite/config/')
	224	config = self._download_json(config_url, video_id, 'Downloading config')
28479149 YCH	225	if 'releaseUrl' in config:
	226	release_url = config['releaseUrl']
	227	else:
	228	release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
	229	smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
10e3d734	230	else:
6e054aac	231	smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
9fb2f1cd S	232
	233	sig = smuggled_data.get('sig')
	234	if sig:
	235	smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
e9bf7479	236
c687ac74	237	formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
f8b56e95	238
26e1c351	239	ret = self.get_metadata(path, video_id)
c687ac74	240	combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
26e1c351 YCH	241	ret.update({
	242	'id': video_id,
	243	'formats': formats,
c687ac74	244	'subtitles': combined_subtitles,
26e1c351	245	})
e9bf7479	246
26e1c351	247	return ret
748ec667	248
f877c6ae	249
26e1c351 YCH	250	class ThePlatformFeedIE(ThePlatformBaseIE):
	251	_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
	252	_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
	253	_TEST = {
	254	# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
	255	'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
	256	'md5': '22d2b84f058d3586efcd99e57d59d314',
	257	'info_dict': {
	258	'id': 'n_hardball_5biden_140207',
	259	'ext': 'mp4',
	260	'title': 'The Biden factor: will Joe run in 2016?',
	261	'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
	262	'thumbnail': 're:^https?://.*\.jpg$',
	263	'upload_date': '20140208',
	264	'timestamp': 1391824260,
	265	'duration': 467.0,
	266	'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
	267	},
	268	}
	269
	270	def _real_extract(self, url):
	271	mobj = re.match(self._VALID_URL, url)
	272
	273	video_id = mobj.group('id')
	274	provider_id = mobj.group('provider_id')
	275	feed_id = mobj.group('feed_id')
	276
	277	real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
	278	feed = self._download_json(real_url, video_id)
	279	entry = feed['entries'][0]
	280
	281	formats = []
c687ac74	282	subtitles = {}
26e1c351 YCH	283	first_video_id = None
	284	duration = None
	285	for item in entry['media$content']:
	286	smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
	287	cur_video_id = url_basename(smil_url)
	288	if first_video_id is None:
	289	first_video_id = cur_video_id
	290	duration = float_or_none(item.get('plfile$duration'))
c687ac74 YCH	291	cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
	292	formats.extend(cur_formats)
	293	subtitles = self._merge_subtitles(subtitles, cur_subtitles)
f877c6ae YCH	294
f877c6ae YCH	295	self._sort_formats(formats)
e9bf7479	296
26e1c351 YCH	297	thumbnails = [{
	298	'url': thumbnail['plfile$url'],
	299	'width': int_or_none(thumbnail.get('plfile$width')),
	300	'height': int_or_none(thumbnail.get('plfile$height')),
	301	} for thumbnail in entry.get('media$thumbnails', [])]
	302
	303	timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
	304	categories = [item['media$name'] for item in entry.get('media$categories', [])]
	305
	306	ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
c687ac74	307	subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
26e1c351	308	ret.update({
e9bf7479	309	'id': video_id,
e9bf7479	310	'formats': formats,
c687ac74	311	'subtitles': subtitles,
26e1c351 YCH	312	'thumbnails': thumbnails,
	313	'duration': duration,
	314	'timestamp': timestamp,
	315	'categories': categories,
	316	})
	317
	318	return ret