[yt-dlp.git] / yt_dlp / extractor / joj.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    format_field,
    int_or_none,
    js_to_json,
    try_get,
)


class JojIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    (?:
                        joj:|
                        https?://media\.joj\.sk/embed/
                    )
                    (?P<id>[^/?#^]+)
                '''
    _TESTS = [{
        'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
        'info_dict': {
            'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
            'ext': 'mp4',
            'title': 'NOVÉ BÝVANIE',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 3118,
        }
    }, {
        'url': 'https://media.joj.sk/embed/9i1cxv',
        'only_matching': True,
    }, {
        'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
        'only_matching': True,
    }, {
        'url': 'joj:9i1cxv',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return [
            mobj.group('url')
            for mobj in re.finditer(
                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
                webpage)]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://media.joj.sk/embed/%s' % video_id, video_id)

        title = self._search_regex(
            (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
             r'<title>(?P<title>[^<]+)'), webpage, 'title',
            default=None, group='title') or self._og_search_title(webpage)

        bitrates = self._parse_json(
            self._search_regex(
                r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
                default='{}'),
            video_id, transform_source=js_to_json, fatal=False)

        formats = []
        for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
            if isinstance(format_url, compat_str):
                height = self._search_regex(
                    r'(\d+)[pP]\.', format_url, 'height', default=None)
                formats.append({
                    'url': format_url,
                    'format_id': format_field(height, template='%sp'),
                    'height': int(height),
                })
        if not formats:
            playlist = self._download_xml(
                'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
                video_id)
            for file_el in playlist.findall('./files/file'):
                path = file_el.get('path')
                if not path:
                    continue
                format_id = file_el.get('id') or file_el.get('label')
                formats.append({
                    'url': 'http://n16.joj.sk/storage/%s' % path.replace(
                        'dat/', '', 1),
                    'format_id': format_id,
                    'height': int_or_none(self._search_regex(
                        r'(\d+)[pP]', format_id or path, 'height',
                        default=None)),
                })
        self._sort_formats(formats)

        thumbnail = self._og_search_thumbnail(webpage)

        duration = int_or_none(self._search_regex(
            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))

        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
        }
Commit	Line	Data
cefecac1 U	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import compat_str
	8	from ..utils import (
e0ddbd02	9	format_field,
cefecac1 U	10	int_or_none,
	11	js_to_json,
	12	try_get,
	13	)
	14
	15
	16	class JojIE(InfoExtractor):
	17	_VALID_URL = r'''(?x)
	18	(?:
	19	joj:\|
	20	https?://media\.joj\.sk/embed/
	21	)
	22	(?P<id>[^/?#^]+)
	23	'''
	24	_TESTS = [{
	25	'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
	26	'info_dict': {
	27	'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
	28	'ext': 'mp4',
	29	'title': 'NOVÉ BÝVANIE',
	30	'thumbnail': r're:^https?://.*\.jpg$',
	31	'duration': 3118,
	32	}
	33	}, {
	34	'url': 'https://media.joj.sk/embed/9i1cxv',
	35	'only_matching': True,
	36	}, {
	37	'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
	38	'only_matching': True,
	39	}, {
	40	'url': 'joj:9i1cxv',
	41	'only_matching': True,
	42	}]
	43
	44	@staticmethod
	45	def _extract_urls(webpage):
	46	return [
	47	mobj.group('url')
	48	for mobj in re.finditer(
	49	r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
	50	webpage)]
	51
	52	def _real_extract(self, url):
	53	video_id = self._match_id(url)
	54
	55	webpage = self._download_webpage(
	56	'https://media.joj.sk/embed/%s' % video_id, video_id)
	57
	58	title = self._search_regex(
	59	(r'videoTitle\s:\s(["\'])(?P<title>(?:(?!\1).)+)\1',
	60	r'<title>(?P<title>[^<]+)'), webpage, 'title',
	61	default=None, group='title') or self._og_search_title(webpage)
	62
	63	bitrates = self._parse_json(
	64	self._search_regex(
	65	r'(?s)(?:src\|bitrates)\s=\s({.+?});', webpage, 'bitrates',
	66	default='{}'),
	67	video_id, transform_source=js_to_json, fatal=False)
	68
	69	formats = []
	70	for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
	71	if isinstance(format_url, compat_str):
	72	height = self._search_regex(
	73	r'(\d+)[pP]\.', format_url, 'height', default=None)
74	formats.append({
75	'url': format_url,
e0ddbd02	76	'format_id': format_field(height, template='%sp'),
cefecac1 U	77	'height': int(height),
	78	})
	79	if not formats:
	80	playlist = self._download_xml(
	81	'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
	82	video_id)
	83	for file_el in playlist.findall('./files/file'):
	84	path = file_el.get('path')
	85	if not path:
	86	continue
	87	format_id = file_el.get('id') or file_el.get('label')
	88	formats.append({
	89	'url': 'http://n16.joj.sk/storage/%s' % path.replace(
	90	'dat/', '', 1),
	91	'format_id': format_id,
	92	'height': int_or_none(self._search_regex(
	93	r'(\d+)[pP]', format_id or path, 'height',
	94	default=None)),
	95	})
	96	self._sort_formats(formats)
	97
	98	thumbnail = self._og_search_thumbnail(webpage)
	99
	100	duration = int_or_none(self._search_regex(
	101	r'videoDuration\s:\s(\d+)', webpage, 'duration', fatal=False))
	102
	103	return {
	104	'id': video_id,
	105	'title': title,
	106	'thumbnail': thumbnail,
	107	'duration': duration,
	108	'formats': formats,
	109	}