[yt-dlp.git] / youtube_dl / extractor / vidzi.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    decode_packed_codes,
    js_to_json,
    NO_DEFAULT,
    PACKED_CODES_RE,
)


class VidziIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
    _TESTS = [{
        'url': 'http://vidzi.tv/cghql9yq6emu.html',
        'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
        'info_dict': {
            'id': 'cghql9yq6emu',
            'ext': 'mp4',
            'title': 'youtube-dl test video  1\\\\2\'3/4<5\\\\6ä7↭',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
        'only_matching': True,
    }, {
        'url': 'http://vidzi.cc/cghql9yq6emu.html',
        'only_matching': True,
    }, {
        'url': 'https://vidzi.si/rph9gztxj1et.html',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://vidzi.tv/%s' % video_id, video_id)
        title = self._html_search_regex(
            r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')

        codes = [webpage]
        codes.extend([
            decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
            for mobj in re.finditer(PACKED_CODES_RE, webpage)])
        for num, code in enumerate(codes, 1):
            jwplayer_data = self._parse_json(
                self._search_regex(
                    r'setup\(([^)]+)\)', code, 'jwplayer data',
                    default=NO_DEFAULT if num == len(codes) else '{}'),
                video_id, transform_source=lambda s: js_to_json(
                    re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
            if jwplayer_data:
                break

        info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
        info_dict['title'] = title

        return info_dict
Commit	Line	Data
5f6a1245	1	# coding: utf-8
018e8355 PH	2	from __future__ import unicode_literals
018e8355 PH	3
2b96b06b S	4	import re
2b96b06b S	5
a4a554a7	6	from .common import InfoExtractor
8f4a2124	7	from ..utils import (
efbd6fb8	8	decode_packed_codes,
8f4a2124	9	js_to_json,
2b96b06b S	10	NO_DEFAULT,
2b96b06b S	11	PACKED_CODES_RE,
8f4a2124	12	)
018e8355	13
8f4a2124	14
a4a554a7	15	class VidziIE(InfoExtractor):
c01db237	16	_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv\|cc\|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
5c4dcf81	17	_TESTS = [{
2c26df76 PH	18	'url': 'http://vidzi.tv/cghql9yq6emu.html',
2c26df76 PH	19	'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
95ee8442	20	'info_dict': {
2c26df76	21	'id': 'cghql9yq6emu',
95ee8442	22	'ext': 'mp4',
2c26df76	23	'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
bd93a12e YCH	24	},
	25	'params': {
	26	# m3u8 download
	27	'skip_download': True,
95ee8442	28	},
5c4dcf81 S	29	}, {
5c4dcf81 S	30	'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
7512aa98	31	'only_matching': True,
58ad6995 S	32	}, {
58ad6995 S	33	'url': 'http://vidzi.cc/cghql9yq6emu.html',
7512aa98	34	'only_matching': True,
c01db237 S	35	}, {
	36	'url': 'https://vidzi.si/rph9gztxj1et.html',
	37	'only_matching': True,
5c4dcf81	38	}]
95ee8442	39
95ee8442	40	def _real_extract(self, url):
018e8355	41	video_id = self._match_id(url)
5f6a1245	42
5c4dcf81 S	43	webpage = self._download_webpage(
5c4dcf81 S	44	'http://vidzi.tv/%s' % video_id, video_id)
018e8355	45	title = self._html_search_regex(
2c26df76	46	r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
5f6a1245	47
f7799582 S	48	codes = [webpage]
	49	codes.extend([
	50	decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
	51	for mobj in re.finditer(PACKED_CODES_RE, webpage)])
	52	for num, code in enumerate(codes, 1):
2b96b06b S	53	jwplayer_data = self._parse_json(
	54	self._search_regex(
	55	r'setup\(([^)]+)\)', code, 'jwplayer data',
f7799582	56	default=NO_DEFAULT if num == len(codes) else '{}'),
81c5df4f U	57	video_id, transform_source=lambda s: js_to_json(
81c5df4f U	58	re.sub(r'\s\+\swindow\[.+?\]', '', s)))
2b96b06b S	59	if jwplayer_data:
2b96b06b S	60	break
8f4a2124 YCH	61
	62	info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
	63	info_dict['title'] = title
	64
	65	return info_dict