[yt-dlp.git] / youtube_dl / extractor / firstpost.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor


class FirstpostIE(InfoExtractor):
    IE_NAME = 'Firstpost.com'
    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'

    _TEST = {
        'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
        'md5': 'ee9114957692f01fb1263ed87039112a',
        'info_dict': {
            'id': '1025403',
            'ext': 'mp4',
            'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
            'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)
        video_url = self._html_search_regex(
            r'<div.*?name="div_video".*?flashvars="([^"]+)">',
            webpage, 'video URL')

        return {
            'id': video_id,
            'url': video_url,
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
Commit	Line	Data
6cb38a99 PH	1	from __future__ import unicode_literals
	2
	3	import re
	4
	5	from .common import InfoExtractor
	6
	7
	8	class FirstpostIE(InfoExtractor):
	9	IE_NAME = 'Firstpost.com'
	10	_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
	11
	12	_TEST = {
	13	'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
	14	'md5': 'ee9114957692f01fb1263ed87039112a',
	15	'info_dict': {
	16	'id': '1025403',
	17	'ext': 'mp4',
	18	'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
	19	'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
	20	}
	21	}
	22
	23	def _real_extract(self, url):
	24	mobj = re.match(self._VALID_URL, url)
	25	video_id = mobj.group('id')
	26
	27	webpage = self._download_webpage(url, video_id)
	28	video_url = self._html_search_regex(
	29	r'<div.?name="div_video".?flashvars="([^"]+)">',
	30	webpage, 'video URL')
	31
	32	return {
	33	'id': video_id,
	34	'url': video_url,
	35	'title': self._og_search_title(webpage),
	36	'description': self._og_search_description(webpage),
	37	'thumbnail': self._og_search_thumbnail(webpage),
	38	}