[yt-dlp.git] / yt_dlp / extractor / morningstar.py

from .common import InfoExtractor


class MorningstarIE(InfoExtractor):
    IE_DESC = 'morningstar.com'
    _VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
        'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
        'info_dict': {
            'id': '615869',
            'ext': 'mp4',
            'title': 'Get Ahead of the Curve on 2013 Taxes',
            'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
            'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
        }
    }, {
        'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(
            r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
        video_url = self._html_search_regex(
            r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
            webpage, 'video URL')
        thumbnail = self._html_search_regex(
            r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
            webpage, 'thumbnail', fatal=False)
        description = self._html_search_regex(
            r'<div id="mstarDeck".*?>(.*?)</div>',
            webpage, 'description', fatal=False)

        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'thumbnail': thumbnail,
            'description': description,
        }
Commit	Line	Data
2ad4d1ba	1	from .common import InfoExtractor
2ad4d1ba PH	2
	3
	4	class MorningstarIE(InfoExtractor):
	5	IE_DESC = 'morningstar.com'
790d379e W	6	_VALID_URL = r'https?://(?:(?:www\|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
790d379e W	7	_TESTS = [{
2ad4d1ba PH	8	'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
	9	'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
	10	'info_dict': {
	11	'id': '615869',
	12	'ext': 'mp4',
	13	'title': 'Get Ahead of the Curve on 2013 Taxes',
	14	'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
	15	'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
	16	}
790d379e W	17	}, {
	18	'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
	19	'only_matching': True,
	20	}]
2ad4d1ba PH	21
2ad4d1ba PH	22	def _real_extract(self, url):
5ad28e7f	23	mobj = self._match_valid_url(url)
2ad4d1ba PH	24	video_id = mobj.group('id')
	25
	26	webpage = self._download_webpage(url, video_id)
	27	title = self._html_search_regex(
	28	r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
	29	video_url = self._html_search_regex(
	30	r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
	31	webpage, 'video URL')
	32	thumbnail = self._html_search_regex(
	33	r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
	34	webpage, 'thumbnail', fatal=False)
	35	description = self._html_search_regex(
	36	r'<div id="mstarDeck".?>(.?)</div>',
	37	webpage, 'description', fatal=False)
	38
	39	return {
	40	'id': video_id,
	41	'title': title,
	42	'url': video_url,
	43	'thumbnail': thumbnail,
	44	'description': description,
	45	}