[yt-dlp.git] / youtube_dl / extractor / ntv.py

# encoding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    unescapeHTML
)


class NTVIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'

    _TESTS = [
        {
            'url': 'http://www.ntv.ru/novosti/863142/',
            'info_dict': {
                'id': '746000',
                'ext': 'flv',
                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'duration': 136,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/video/novosti/750370/',
            'info_dict': {
                'id': '750370',
                'ext': 'flv',
                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'duration': 172,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
            'info_dict': {
                'id': '747480',
                'ext': 'flv',
                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
                'duration': 1496,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
            'info_dict': {
                'id': '758100',
                'ext': 'flv',
                'title': 'Остросюжетный фильм «Кома»',
                'description': 'Остросюжетный фильм «Кома»',
                'duration': 5592,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
            'info_dict': {
                'id': '751482',
                'ext': 'flv',
                'title': '«Дело врачей»: «Деревце жизни»',
                'description': '«Дело врачей»: «Деревце жизни»',
                'duration': 2590,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
    ]

    _VIDEO_ID_REGEXES = [
        r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
        r'<video embed=[^>]+><id>(\d+)</id>',
        r'<video restriction[^>]+><key>(\d+)</key>'
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        page = self._download_webpage(url, video_id, 'Downloading page')

        for pattern in self._VIDEO_ID_REGEXES:
            mobj = re.search(pattern, page)
            if mobj:
                break

        if not mobj:
            raise ExtractorError('No media links available for %s' % video_id)

        video_id = mobj.group(1)

        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
        title = unescapeHTML(player.find('./data/title').text)
        description = unescapeHTML(player.find('./data/description').text)

        video = player.find('./data/video')
        video_id = video.find('./id').text
        thumbnail = video.find('./splash').text
        duration = int(video.find('./totaltime').text)
        view_count = int(video.find('./views').text)
        puid22 = video.find('./puid22').text

        apps = {
            '4': 'video1',
            '7': 'video2',
        }

        app = apps[puid22] if puid22 in apps else apps['4']

        formats = []
        for format_id in ['', 'hi', 'webm']:
            file = video.find('./%sfile' % format_id)
            if file is None:
                continue
            size = video.find('./%ssize' % format_id)
            formats.append({
                'url': 'rtmp://media.ntv.ru/%s' % app,
                'app': app,
                'play_path': file.text,
                'rtmp_conn': 'B:1',
                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                'page_url': 'http://www.ntv.ru',
                'flash_ver': 'LNX 11,2,202,341',
                'rtmp_live': True,
                'ext': 'flv',
                'filesize': int(size.text),
            })
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
        }
Commit	Line	Data
263f4b51 S	1	# encoding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
056b5668	8	ExtractorError,
263f4b51 S	9	unescapeHTML
	10	)
	11
	12
	13	class NTVIE(InfoExtractor):
	14	_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
	15
	16	_TESTS = [
	17	{
	18	'url': 'http://www.ntv.ru/novosti/863142/',
	19	'info_dict': {
	20	'id': '746000',
	21	'ext': 'flv',
	22	'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
	23	'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
	24	'duration': 136,
	25	},
	26	'params': {
	27	# rtmp download
	28	'skip_download': True,
	29	},
	30	},
	31	{
	32	'url': 'http://www.ntv.ru/video/novosti/750370/',
	33	'info_dict': {
	34	'id': '750370',
	35	'ext': 'flv',
	36	'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
	37	'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
	38	'duration': 172,
	39	},
	40	'params': {
	41	# rtmp download
	42	'skip_download': True,
	43	},
	44	},
	45	{
	46	'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
	47	'info_dict': {
	48	'id': '747480',
	49	'ext': 'flv',
	50	'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
	51	'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
	52	'duration': 1496,
	53	},
	54	'params': {
	55	# rtmp download
	56	'skip_download': True,
	57	},
	58	},
	59	{
	60	'url': 'http://www.ntv.ru/kino/Koma_film',
	61	'info_dict': {
c47d21da	62	'id': '758100',
263f4b51	63	'ext': 'flv',
c47d21da S	64	'title': 'Остросюжетный фильм «Кома»',
	65	'description': 'Остросюжетный фильм «Кома»',
	66	'duration': 5592,
263f4b51 S	67	},
	68	'params': {
	69	# rtmp download
	70	'skip_download': True,
	71	},
	72	},
	73	{
	74	'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
	75	'info_dict': {
	76	'id': '751482',
	77	'ext': 'flv',
	78	'title': '«Дело врачей»: «Деревце жизни»',
	79	'description': '«Дело врачей»: «Деревце жизни»',
	80	'duration': 2590,
	81	},
	82	'params': {
	83	# rtmp download
	84	'skip_download': True,
	85	},
	86	},
	87	]
	88
	89	_VIDEO_ID_REGEXES = [
	90	r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
	91	r'<video embed=[^>]+><id>(\d+)</id>',
	92	r'<video restriction[^>]+><key>(\d+)</key>'
	93	]
	94
	95	def _real_extract(self, url):
	96	mobj = re.match(self._VALID_URL, url)
	97	video_id = mobj.group('id')
	98
	99	page = self._download_webpage(url, video_id, 'Downloading page')
	100
056b5668 S	101	for pattern in self._VIDEO_ID_REGEXES:
	102	mobj = re.search(pattern, page)
	103	if mobj:
	104	break
263f4b51	105
056b5668 S	106	if not mobj:
	107	raise ExtractorError('No media links available for %s' % video_id)
	108
	109	video_id = mobj.group(1)
263f4b51 S	110
	111	player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
	112	title = unescapeHTML(player.find('./data/title').text)
	113	description = unescapeHTML(player.find('./data/description').text)
	114
	115	video = player.find('./data/video')
	116	video_id = video.find('./id').text
	117	thumbnail = video.find('./splash').text
	118	duration = int(video.find('./totaltime').text)
	119	view_count = int(video.find('./views').text)
	120	puid22 = video.find('./puid22').text
	121
	122	apps = {
	123	'4': 'video1',
	124	'7': 'video2',
	125	}
	126
8f656244 S	127	app = apps[puid22] if puid22 in apps else apps['4']
8f656244 S	128
263f4b51 S	129	formats = []
	130	for format_id in ['', 'hi', 'webm']:
	131	file = video.find('./%sfile' % format_id)
	132	if file is None:
	133	continue
	134	size = video.find('./%ssize' % format_id)
263f4b51 S	135	formats.append({
	136	'url': 'rtmp://media.ntv.ru/%s' % app,
	137	'app': app,
	138	'play_path': file.text,
	139	'rtmp_conn': 'B:1',
	140	'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
	141	'page_url': 'http://www.ntv.ru',
	142	'flash_ver': 'LNX 11,2,202,341',
	143	'rtmp_live': True,
	144	'ext': 'flv',
	145	'filesize': int(size.text),
	146	})
	147	self._sort_formats(formats)
	148
	149	return {
	150	'id': video_id,
	151	'title': title,
	152	'description': description,
	153	'thumbnail': thumbnail,
	154	'duration': duration,
	155	'view_count': view_count,
	156	'formats': formats,
	157	}