[yt-dlp.git] / youtube_dl / extractor / ntv.py

# encoding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    RegexNotFoundError,
    unescapeHTML
)


class NTVIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'

    _TESTS = [
        {
            'url': 'http://www.ntv.ru/novosti/863142/',
            'info_dict': {
                'id': '746000',
                'ext': 'flv',
                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'duration': 136,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/video/novosti/750370/',
            'info_dict': {
                'id': '750370',
                'ext': 'flv',
                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'duration': 172,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
            'info_dict': {
                'id': '747480',
                'ext': 'flv',
                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
                'duration': 1496,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
            'info_dict': {
                'id': '750783',
                'ext': 'flv',
                'title': 'Остросюжетный фильм «Кома» \97 4 апреля вечером на НТВ',
                'description': 'Остросюжетный фильм «Кома» \97 4 апреля вечером на НТВ',
                'duration': 28,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
            'info_dict': {
                'id': '751482',
                'ext': 'flv',
                'title': '«Дело врачей»: «Деревце жизни»',
                'description': '«Дело врачей»: «Деревце жизни»',
                'duration': 2590,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
    ]

    _VIDEO_ID_REGEXES = [
        r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
        r'<video embed=[^>]+><id>(\d+)</id>',
        r'<video restriction[^>]+><key>(\d+)</key>'
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        page = self._download_webpage(url, video_id, 'Downloading page')

        def extract(patterns, name, page, fatal=False):
            for pattern in patterns:
                mobj = re.search(pattern, page)
                if mobj:
                    return mobj.group(1)
            if fatal:
                raise RegexNotFoundError(u'Unable to extract %s' % name)
            return None

        video_id = extract(self._VIDEO_ID_REGEXES, 'video id', page, fatal=True)

        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
        title = unescapeHTML(player.find('./data/title').text)
        description = unescapeHTML(player.find('./data/description').text)

        video = player.find('./data/video')
        video_id = video.find('./id').text
        thumbnail = video.find('./splash').text
        duration = int(video.find('./totaltime').text)
        view_count = int(video.find('./views').text)
        puid22 = video.find('./puid22').text

        apps = {
            '4': 'video1',
            '7': 'video2',
        }

        app = apps[puid22] if puid22 in apps else apps['4']

        formats = []
        for format_id in ['', 'hi', 'webm']:
            file = video.find('./%sfile' % format_id)
            if file is None:
                continue
            size = video.find('./%ssize' % format_id)
            formats.append({
                'url': 'rtmp://media.ntv.ru/%s' % app,
                'app': app,
                'play_path': file.text,
                'rtmp_conn': 'B:1',
                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                'page_url': 'http://www.ntv.ru',
                'flash_ver': 'LNX 11,2,202,341',
                'rtmp_live': True,
                'ext': 'flv',
                'filesize': int(size.text),
            })
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
        }
Commit	Line	Data
263f4b51 S	1	# encoding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	RegexNotFoundError,
	9	unescapeHTML
	10	)
	11
	12
	13	class NTVIE(InfoExtractor):
	14	_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
	15
	16	_TESTS = [
	17	{
	18	'url': 'http://www.ntv.ru/novosti/863142/',
	19	'info_dict': {
	20	'id': '746000',
	21	'ext': 'flv',
	22	'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
	23	'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
	24	'duration': 136,
	25	},
	26	'params': {
	27	# rtmp download
	28	'skip_download': True,
	29	},
	30	},
	31	{
	32	'url': 'http://www.ntv.ru/video/novosti/750370/',
	33	'info_dict': {
	34	'id': '750370',
	35	'ext': 'flv',
	36	'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
	37	'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
	38	'duration': 172,
	39	},
	40	'params': {
	41	# rtmp download
	42	'skip_download': True,
	43	},
	44	},
	45	{
	46	'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
	47	'info_dict': {
	48	'id': '747480',
	49	'ext': 'flv',
	50	'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
	51	'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
	52	'duration': 1496,
	53	},
	54	'params': {
	55	# rtmp download
	56	'skip_download': True,
	57	},
	58	},
	59	{
	60	'url': 'http://www.ntv.ru/kino/Koma_film',
	61	'info_dict': {
	62	'id': '750783',
	63	'ext': 'flv',
	64	'title': 'Остросюжетный фильм «Кома» \97 4 апреля вечером на НТВ',
65	'description': 'Остросюжетный фильм «Кома» \97 4 апреля вечером на НТВ',
66	'duration': 28,
67	},
68	'params': {
69	# rtmp download
70	'skip_download': True,
71	},
72	},
73	{
74	'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
75	'info_dict': {
76	'id': '751482',
77	'ext': 'flv',
78	'title': '«Дело врачей»: «Деревце жизни»',
79	'description': '«Дело врачей»: «Деревце жизни»',
80	'duration': 2590,
81	},
82	'params': {
83	# rtmp download
84	'skip_download': True,
85	},
86	},
87	]
88
89	_VIDEO_ID_REGEXES = [
90	r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
91	r'<video embed=[^>]+><id>(\d+)</id>',
92	r'<video restriction[^>]+><key>(\d+)</key>'
93	]
94
95	def _real_extract(self, url):
96	mobj = re.match(self._VALID_URL, url)
97	video_id = mobj.group('id')
98
99	page = self._download_webpage(url, video_id, 'Downloading page')
100
101	def extract(patterns, name, page, fatal=False):
102	for pattern in patterns:
103	mobj = re.search(pattern, page)
104	if mobj:
105	return mobj.group(1)
106	if fatal:
107	raise RegexNotFoundError(u'Unable to extract %s' % name)
108	return None
109
110	video_id = extract(self._VIDEO_ID_REGEXES, 'video id', page, fatal=True)
111
112	player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
113	title = unescapeHTML(player.find('./data/title').text)
114	description = unescapeHTML(player.find('./data/description').text)
115
116	video = player.find('./data/video')
117	video_id = video.find('./id').text
118	thumbnail = video.find('./splash').text
119	duration = int(video.find('./totaltime').text)
120	view_count = int(video.find('./views').text)
121	puid22 = video.find('./puid22').text
122
123	apps = {
124	'4': 'video1',
125	'7': 'video2',
126	}
127
8f656244 S	128	app = apps[puid22] if puid22 in apps else apps['4']
8f656244 S	129
263f4b51 S	130	formats = []
	131	for format_id in ['', 'hi', 'webm']:
	132	file = video.find('./%sfile' % format_id)
	133	if file is None:
	134	continue
	135	size = video.find('./%ssize' % format_id)
263f4b51 S	136	formats.append({
	137	'url': 'rtmp://media.ntv.ru/%s' % app,
	138	'app': app,
	139	'play_path': file.text,
	140	'rtmp_conn': 'B:1',
	141	'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
	142	'page_url': 'http://www.ntv.ru',
	143	'flash_ver': 'LNX 11,2,202,341',
	144	'rtmp_live': True,
	145	'ext': 'flv',
	146	'filesize': int(size.text),
	147	})
	148	self._sort_formats(formats)
	149
	150	return {
	151	'id': video_id,
	152	'title': title,
	153	'description': description,
	154	'thumbnail': thumbnail,
	155	'duration': duration,
	156	'view_count': view_count,
	157	'formats': formats,
	158	}