jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import compat_str
	8	from ..utils import (
	9	format_field,
	10	int_or_none,
	11	js_to_json,
	12	try_get,
	13	)
	14
	15
	16	class JojIE(InfoExtractor):
	17	_VALID_URL = r'''(?x)
	18	(?:
	19	joj:\|
	20	https?://media\.joj\.sk/embed/
	21	)
	22	(?P<id>[^/?#^]+)
	23	'''
	24	_TESTS = [{
	25	'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
	26	'info_dict': {
	27	'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
	28	'ext': 'mp4',
	29	'title': 'NOVÉ BÝVANIE',
	30	'thumbnail': r're:^https?://.*\.jpg$',
	31	'duration': 3118,
	32	}
	33	}, {
	34	'url': 'https://media.joj.sk/embed/9i1cxv',
	35	'only_matching': True,
	36	}, {
	37	'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
	38	'only_matching': True,
	39	}, {
	40	'url': 'joj:9i1cxv',
	41	'only_matching': True,
	42	}]
	43
	44	@staticmethod
	45	def _extract_urls(webpage):
	46	return [
	47	mobj.group('url')
	48	for mobj in re.finditer(
	49	r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
	50	webpage)]
	51
	52	def _real_extract(self, url):
	53	video_id = self._match_id(url)
	54
	55	webpage = self._download_webpage(
	56	'https://media.joj.sk/embed/%s' % video_id, video_id)
	57
	58	title = self._search_regex(
	59	(r'videoTitle\s:\s(["\'])(?P<title>(?:(?!\1).)+)\1',
	60	r'<title>(?P<title>[^<]+)'), webpage, 'title',
	61	default=None, group='title') or self._og_search_title(webpage)
	62
	63	bitrates = self._parse_json(
	64	self._search_regex(
	65	r'(?s)(?:src\|bitrates)\s=\s({.+?});', webpage, 'bitrates',
	66	default='{}'),
	67	video_id, transform_source=js_to_json, fatal=False)
	68
	69	formats = []
	70	for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
	71	if isinstance(format_url, compat_str):
	72	height = self._search_regex(
	73	r'(\d+)[pP]\.', format_url, 'height', default=None)
	74	formats.append({
	75	'url': format_url,
	76	'format_id': format_field(height, template='%sp'),
	77	'height': int(height),
	78	})
	79	if not formats:
	80	playlist = self._download_xml(
	81	'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
	82	video_id)
	83	for file_el in playlist.findall('./files/file'):
	84	path = file_el.get('path')
	85	if not path:
	86	continue
	87	format_id = file_el.get('id') or file_el.get('label')
	88	formats.append({
	89	'url': 'http://n16.joj.sk/storage/%s' % path.replace(
	90	'dat/', '', 1),
	91	'format_id': format_id,
	92	'height': int_or_none(self._search_regex(
	93	r'(\d+)[pP]', format_id or path, 'height',
	94	default=None)),
	95	})
	96	self._sort_formats(formats)
	97
	98	thumbnail = self._og_search_thumbnail(webpage)
	99
	100	duration = int_or_none(self._search_regex(
	101	r'videoDuration\s:\s(\d+)', webpage, 'duration', fatal=False))
	102
	103	return {
	104	'id': video_id,
	105	'title': title,
	106	'thumbnail': thumbnail,
	107	'duration': duration,
	108	'formats': formats,
	109	}