[yt-dlp.git] / youtube_dl / extractor / iprima.py

# coding: utf-8
from __future__ import unicode_literals

import re
import time

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    js_to_json,
    sanitized_Request,
)


class IPrimaIE(InfoExtractor):
    _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'

    _TESTS = [{
        'url': 'http://play.iprima.cz/gondici-s-r-o-33',
        'info_dict': {
            'id': 'p136534',
            'ext': 'mp4',
            'title': 'Gondíci s. r. o. (34)',
            'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
        },
        'params': {
            'skip_download': True,  # m3u8 download
        },
    }, {
        'url': 'http://play.iprima.cz/particka/particka-92',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')

        req = sanitized_Request(
            'http://play.iprima.cz/prehravac/init?_infuse=1'
            '&_ts=%s&productId=%s' % (round(time.time()), video_id))
        req.add_header('Referer', url)
        playerpage = self._download_webpage(req, video_id, note='Downloading player')

        formats = []

        def extract_formats(format_url, format_key=None, lang=None):
            ext = determine_ext(format_url)
            new_formats = []
            if format_key == 'hls' or ext == 'm3u8':
                new_formats = self._extract_m3u8_formats(
                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False)
            elif format_key == 'dash' or ext == 'mpd':
                return
                new_formats = self._extract_mpd_formats(
                    format_url, video_id, mpd_id='dash', fatal=False)
            if lang:
                for f in new_formats:
                    if not f.get('language'):
                        f['language'] = lang
            formats.extend(new_formats)

        options = self._parse_json(
            self._search_regex(
                r'(?s)var\s+playerOptions\s*=\s*({.+?});',
                playerpage, 'player options', default='{}'),
            video_id, transform_source=js_to_json, fatal=False)
        if options:
            for key, tracks in options.get('tracks', {}).items():
                if not isinstance(tracks, list):
                    continue
                for track in tracks:
                    src = track.get('src')
                    if src:
                        extract_formats(src, key.lower(), track.get('lang'))

        if not formats:
            for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
                extract_formats(src)

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
            'description': self._og_search_description(webpage),
        }
Commit	Line	Data
369e7e3f	1	# coding: utf-8
7881a644	2	from __future__ import unicode_literals
	3
	4	import re
f406c787	5	import time
7881a644	6
7881a644	7	from .common import InfoExtractor
1cc79574	8	from ..utils import (
369e7e3f S	9	determine_ext,
369e7e3f S	10	js_to_json,
5c2266df	11	sanitized_Request,
82642235	12	)
7881a644	13
	14
	15	class IPrimaIE(InfoExtractor):
f406c787	16	_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
7881a644	17
7881a644	18	_TESTS = [{
f406c787	19	'url': 'http://play.iprima.cz/gondici-s-r-o-33',
7881a644	20	'info_dict': {
f406c787	21	'id': 'p136534',
	22	'ext': 'mp4',
	23	'title': 'Gondíci s. r. o. (34)',
	24	'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
7881a644	25	},
7881a644	26	'params': {
f406c787	27	'skip_download': True, # m3u8 download
7881a644	28	},
973f2532	29	}, {
f406c787	30	'url': 'http://play.iprima.cz/particka/particka-92',
bc03e585	31	'only_matching': True,
973f2532	32	}]
7881a644	33
7881a644	34	def _real_extract(self, url):
369e7e3f	35	video_id = self._match_id(url)
7881a644	36
	37	webpage = self._download_webpage(url, video_id)
	38
f406c787	39	video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
82642235	40
70328330 JMF	41	req = sanitized_Request(
70328330 JMF	42	'http://play.iprima.cz/prehravac/init?_infuse=1'
59b35c67	43	'&_ts=%s&productId=%s' % (round(time.time()), video_id))
7881a644	44	req.add_header('Referer', url)
f406c787	45	playerpage = self._download_webpage(req, video_id, note='Downloading player')
7881a644	46
369e7e3f	47	formats = []
7881a644	48
369e7e3f S	49	def extract_formats(format_url, format_key=None, lang=None):
	50	ext = determine_ext(format_url)
	51	new_formats = []
	52	if format_key == 'hls' or ext == 'm3u8':
	53	new_formats = self._extract_m3u8_formats(
	54	format_url, video_id, 'mp4', entry_protocol='m3u8_native',
	55	m3u8_id='hls', fatal=False)
	56	elif format_key == 'dash' or ext == 'mpd':
	57	return
	58	new_formats = self._extract_mpd_formats(
	59	format_url, video_id, mpd_id='dash', fatal=False)
	60	if lang:
	61	for f in new_formats:
	62	if not f.get('language'):
	63	f['language'] = lang
	64	formats.extend(new_formats)
	65
	66	options = self._parse_json(
	67	self._search_regex(
	68	r'(?s)var\s+playerOptions\s=\s({.+?});',
	69	playerpage, 'player options', default='{}'),
	70	video_id, transform_source=js_to_json, fatal=False)
	71	if options:
	72	for key, tracks in options.get('tracks', {}).items():
	73	if not isinstance(tracks, list):
	74	continue
	75	for track in tracks:
	76	src = track.get('src')
	77	if src:
	78	extract_formats(src, key.lower(), track.get('lang'))
	79
	80	if not formats:
	81	for _, src in re.findall(r'src["\']\s:\s(["\'])(.+?)\1', playerpage):
	82	extract_formats(src)
91264ce5 PH	83
91264ce5 PH	84	self._sort_formats(formats)
7881a644	85
7881a644	86	return {
f406c787	87	'id': video_id,
f406c787	88	'title': self._og_search_title(webpage),
7881a644	89	'thumbnail': self._og_search_thumbnail(webpage),
7881a644	90	'formats': formats,
f406c787	91	'description': self._og_search_description(webpage),
91264ce5	92	}