[yt-dlp.git] / youtube_dl / extractor / pladform.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    xpath_text,
    qualities,
)


class PladformIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:
                                out\.pladform\.ru/player|
                                static\.pladform\.ru/player\.swf
                            )
                            \?.*\bvideoid=|
                            video\.pladform\.ru/catalog/video/videoid/
                        )
                        (?P<id>\d+)
                    '''
    _TESTS = [{
        # http://muz-tv.ru/kinozal/view/7400/
        'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
        'md5': '61f37b575dd27f1bb2e1854777fe31f4',
        'info_dict': {
            'id': '100183293',
            'ext': 'mp4',
            'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
            'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 694,
            'age_limit': 0,
        },
    }, {
        'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
        'only_matching': True,
    }, {
        'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
        if mobj:
            return mobj.group('url')

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video = self._download_xml(
            'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
            video_id)

        if video.tag == 'error':
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, video.text),
                expected=True)

        quality = qualities(('ld', 'sd', 'hd'))

        formats = [{
            'url': src.text,
            'format_id': src.get('quality'),
            'quality': quality(src.get('quality')),
        } for src in video.findall('./src')]
        self._sort_formats(formats)

        webpage = self._download_webpage(
            'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
            video_id)

        title = self._og_search_title(webpage, fatal=False) or xpath_text(
            video, './/title', 'title', fatal=True)
        description = self._search_regex(
            r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
        thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
            video, './/cover', 'cover')

        duration = int_or_none(xpath_text(video, './/time', 'duration'))
        age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'age_limit': age_limit,
            'formats': formats,
        }
Commit	Line	Data
28778d6b S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
db7c9da8 S	4	import re
db7c9da8 S	5
28778d6b S	6	from .common import InfoExtractor
	7	from ..utils import (
	8	ExtractorError,
	9	int_or_none,
	10	xpath_text,
11101076	11	qualities,
28778d6b S	12	)
	13
	14
	15	class PladformIE(InfoExtractor):
	16	_VALID_URL = r'''(?x)
	17	https?://
	18	(?:
	19	(?:
	20	out\.pladform\.ru/player\|
	21	static\.pladform\.ru/player\.swf
	22	)
	23	\?.*\bvideoid=\|
	24	video\.pladform\.ru/catalog/video/videoid/
	25	)
	26	(?P<id>\d+)
	27	'''
	28	_TESTS = [{
	29	# http://muz-tv.ru/kinozal/view/7400/
	30	'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
	31	'md5': '61f37b575dd27f1bb2e1854777fe31f4',
	32	'info_dict': {
	33	'id': '100183293',
	34	'ext': 'mp4',
5cb91cea	35	'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
28778d6b	36	'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
ec85ded8	37	'thumbnail': r're:^https?://.*\.jpg$',
28778d6b S	38	'duration': 694,
	39	'age_limit': 0,
	40	},
	41	}, {
	42	'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
	43	'only_matching': True,
	44	}, {
	45	'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
	46	'only_matching': True,
	47	}]
	48
db7c9da8 S	49	@staticmethod
	50	def _extract_url(webpage):
	51	mobj = re.search(
8ff66978	52	r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
db7c9da8 S	53	if mobj:
	54	return mobj.group('url')
	55
28778d6b S	56	def _real_extract(self, url):
	57	video_id = self._match_id(url)
	58
	59	video = self._download_xml(
	60	'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
	61	video_id)
	62
	63	if video.tag == 'error':
	64	raise ExtractorError(
	65	'%s returned error: %s' % (self.IE_NAME, video.text),
	66	expected=True)
	67
11101076 S	68	quality = qualities(('ld', 'sd', 'hd'))
11101076 S	69
28778d6b S	70	formats = [{
	71	'url': src.text,
	72	'format_id': src.get('quality'),
11101076	73	'quality': quality(src.get('quality')),
28778d6b S	74	} for src in video.findall('./src')]
	75	self._sort_formats(formats)
	76
	77	webpage = self._download_webpage(
	78	'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
	79	video_id)
	80
	81	title = self._og_search_title(webpage, fatal=False) or xpath_text(
	82	video, './/title', 'title', fatal=True)
	83	description = self._search_regex(
	84	r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
	85	thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
	86	video, './/cover', 'cover')
	87
	88	duration = int_or_none(xpath_text(video, './/time', 'duration'))
	89	age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
	90
	91	return {
	92	'id': video_id,
	93	'title': title,
	94	'description': description,
	95	'thumbnail': thumbnail,
	96	'duration': duration,
	97	'age_limit': age_limit,
	98	'formats': formats,
	99	}