[yt-dlp.git] / youtube_dl / extractor / eagleplatform.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
    ExtractorError,
    int_or_none,
    url_basename,
)


class EaglePlatformIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    (?:
                        eagleplatform:(?P<custom_host>[^/]+):|
                        https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
                    )
                    (?P<id>\d+)
                '''
    _TESTS = [{
        # http://lenta.ru/news/2015/03/06/navalny/
        'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
        # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
        'info_dict': {
            'id': '227304',
            'ext': 'mp4',
            'title': 'Навальный вышел на свободу',
            'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 87,
            'view_count': int,
            'age_limit': 0,
        },
    }, {
        # http://muz-tv.ru/play/7129/
        # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
        'url': 'eagleplatform:media.clipyou.ru:12820',
        'md5': '358597369cf8ba56675c1df15e7af624',
        'info_dict': {
            'id': '12820',
            'ext': 'mp4',
            'title': "'O Sole Mio",
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 216,
            'view_count': int,
        },
        'skip': 'Georestricted',
    }]

    @staticmethod
    def _extract_url(webpage):
        # Regular iframe embedding
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
            webpage)
        if mobj is not None:
            return mobj.group('url')
        # Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
        mobj = re.search(
            r'''(?xs)
                    <script[^>]+
                        src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
                    .+?
                    <div[^>]+
                        class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
                        data-id=["\'](?P<id>\d+)
            ''', webpage)
        if mobj is not None:
            return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()

    @staticmethod
    def _handle_error(response):
        status = int_or_none(response.get('status', 200))
        if status != 200:
            raise ExtractorError(' '.join(response['errors']), expected=True)

    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
        try:
            response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
        except ExtractorError as ee:
            if isinstance(ee.cause, compat_HTTPError):
                response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
                self._handle_error(response)
            raise
        return response

    def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
        return self._download_json(url_or_request, video_id, note)['data'][0]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')

        player_data = self._download_json(
            'http://%s/api/player_data?id=%s' % (host, video_id), video_id)

        media = player_data['data']['playlist']['viewports'][0]['medialist'][0]

        title = media['title']
        description = media.get('description')
        thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
        duration = int_or_none(media.get('duration'))
        view_count = int_or_none(media.get('views'))

        age_restriction = media.get('age_restriction')
        age_limit = None
        if age_restriction:
            age_limit = 0 if age_restriction == 'allow_all' else 18

        secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')

        formats = []

        m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
        m3u8_formats = self._extract_m3u8_formats(
            m3u8_url, video_id,
            'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
        formats.extend(m3u8_formats)

        mp4_url = self._get_video_url(
            # Secure mp4 URL is constructed according to Player.prototype.mp4 from
            # http://lentaru.media.eagleplatform.com/player/player.js
            re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
            video_id, 'Downloading mp4 JSON')
        mp4_url_basename = url_basename(mp4_url)
        for m3u8_format in m3u8_formats:
            mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
            if mobj:
                http_format = m3u8_format.copy()
                video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
                if not self._is_valid_url(video_url, video_id):
                    continue
                http_format.update({
                    'url': video_url,
                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
                    'protocol': 'http',
                })
                formats.append(http_format)

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'age_limit': age_limit,
            'formats': formats,
        }
Commit	Line	Data
0bf79ac4 S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
ae655671	7	from ..compat import compat_HTTPError
0bf79ac4 S	8	from ..utils import (
	9	ExtractorError,
	10	int_or_none,
237a4110	11	url_basename,
0bf79ac4 S	12	)
	13
	14
	15	class EaglePlatformIE(InfoExtractor):
	16	_VALID_URL = r'''(?x)
	17	(?:
	18	eagleplatform:(?P<custom_host>[^/]+):\|
	19	https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
	20	)
	21	(?P<id>\d+)
	22	'''
	23	_TESTS = [{
	24	# http://lenta.ru/news/2015/03/06/navalny/
	25	'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
4645432d	26	# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
0bf79ac4 S	27	'info_dict': {
	28	'id': '227304',
	29	'ext': 'mp4',
	30	'title': 'Навальный вышел на свободу',
	31	'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
	32	'thumbnail': 're:^https?://.*\.jpg$',
	33	'duration': 87,
	34	'view_count': int,
	35	'age_limit': 0,
	36	},
	37	}, {
	38	# http://muz-tv.ru/play/7129/
	39	# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
	40	'url': 'eagleplatform:media.clipyou.ru:12820',
237a4110	41	'md5': '358597369cf8ba56675c1df15e7af624',
0bf79ac4 S	42	'info_dict': {
	43	'id': '12820',
	44	'ext': 'mp4',
	45	'title': "'O Sole Mio",
	46	'thumbnail': 're:^https?://.*\.jpg$',
	47	'duration': 216,
	48	'view_count': int,
	49	},
f67dcc09	50	'skip': 'Georestricted',
0bf79ac4 S	51	}]
0bf79ac4 S	52
06a96da1 S	53	@staticmethod
06a96da1 S	54	def _extract_url(webpage):
3083e4dc	55	# Regular iframe embedding
06a96da1 S	56	mobj = re.search(
	57	r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
	58	webpage)
	59	if mobj is not None:
	60	return mobj.group('url')
3083e4dc S	61	# Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
	62	mobj = re.search(
	63	r'''(?xs)
	64	<script[^>]+
	65	src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
	66	.+?
	67	<div[^>]+
	68	class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
	69	data-id=["\'](?P<id>\d+)
	70	''', webpage)
	71	if mobj is not None:
	72	return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
06a96da1	73
3c63e1bb S	74	@staticmethod
3c63e1bb S	75	def _handle_error(response):
0bf79ac4 S	76	status = int_or_none(response.get('status', 200))
	77	if status != 200:
	78	raise ExtractorError(' '.join(response['errors']), expected=True)
	79
22becac4	80	def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
ae655671 YCH	81	try:
	82	response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
	83	except ExtractorError as ee:
	84	if isinstance(ee.cause, compat_HTTPError):
	85	response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
	86	self._handle_error(response)
	87	raise
22becac4	88	return response
	89
	90	def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
	91	return self._download_json(url_or_request, video_id, note)['data'][0]
0bf79ac4 S	92
	93	def _real_extract(self, url):
	94	mobj = re.match(self._VALID_URL, url)
	95	host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
	96
	97	player_data = self._download_json(
	98	'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
	99
	100	media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
	101
	102	title = media['title']
	103	description = media.get('description')
2f962d0a	104	thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
0bf79ac4 S	105	duration = int_or_none(media.get('duration'))
	106	view_count = int_or_none(media.get('views'))
	107
	108	age_restriction = media.get('age_restriction')
	109	age_limit = None
	110	if age_restriction:
	111	age_limit = 0 if age_restriction == 'allow_all' else 18
	112
d045f0bd	113	secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
0bf79ac4	114
237a4110	115	formats = []
237a4110	116
9d632b1b	117	m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
237a4110	118	m3u8_formats = self._extract_m3u8_formats(
9d632b1b	119	m3u8_url, video_id,
e36963e0	120	'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
237a4110	121	formats.extend(m3u8_formats)
9d632b1b	122
9d632b1b	123	mp4_url = self._get_video_url(
c471b345 S	124	# Secure mp4 URL is constructed according to Player.prototype.mp4 from
	125	# http://lentaru.media.eagleplatform.com/player/player.js
	126	re.sub(r'm3u8\|hlsvod\|hls\|f4m', 'mp4', secure_m3u8),
9d632b1b	127	video_id, 'Downloading mp4 JSON')
237a4110	128	mp4_url_basename = url_basename(mp4_url)
	129	for m3u8_format in m3u8_formats:
	130	mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
	131	if mobj:
	132	http_format = m3u8_format.copy()
4645432d YCH	133	video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
	134	if not self._is_valid_url(video_url, video_id):
	135	continue
237a4110	136	http_format.update({
4645432d	137	'url': video_url,
237a4110	138	'format_id': m3u8_format['format_id'].replace('hls', 'http'),
	139	'protocol': 'http',
	140	})
	141	formats.append(http_format)
9d632b1b	142
0bf79ac4 S	143	self._sort_formats(formats)
	144
	145	return {
	146	'id': video_id,
	147	'title': title,
	148	'description': description,
	149	'thumbnail': thumbnail,
	150	'duration': duration,
	151	'view_count': view_count,
	152	'age_limit': age_limit,
	153	'formats': formats,
	154	}