[yt-dlp.git] / youtube_dl / extractor / vidzi.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .jwplatform import JWPlatformBaseIE
from ..utils import (
    decode_packed_codes,
    js_to_json,
    NO_DEFAULT,
    PACKED_CODES_RE,
)


class VidziIE(JWPlatformBaseIE):
    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
    _TESTS = [{
        'url': 'http://vidzi.tv/cghql9yq6emu.html',
        'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
        'info_dict': {
            'id': 'cghql9yq6emu',
            'ext': 'mp4',
            'title': 'youtube-dl test video  1\\\\2\'3/4<5\\\\6ä7↭',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
        'skip_download': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://vidzi.tv/%s' % video_id, video_id)
        title = self._html_search_regex(
            r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')

        packed_codes = [mobj.group(0) for mobj in re.finditer(
            PACKED_CODES_RE, webpage)]
        for num, pc in enumerate(packed_codes, 1):
            code = decode_packed_codes(pc).replace('\\\'', '\'')
            jwplayer_data = self._parse_json(
                self._search_regex(
                    r'setup\(([^)]+)\)', code, 'jwplayer data',
                    default=NO_DEFAULT if num == len(packed_codes) else '{}'),
                video_id, transform_source=js_to_json)
            if jwplayer_data:
                break

        info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
        info_dict['title'] = title

        return info_dict
Commit	Line	Data
5f6a1245	1	# coding: utf-8
018e8355 PH	2	from __future__ import unicode_literals
018e8355 PH	3
2b96b06b S	4	import re
2b96b06b S	5
8f4a2124 YCH	6	from .jwplatform import JWPlatformBaseIE
8f4a2124 YCH	7	from ..utils import (
efbd6fb8	8	decode_packed_codes,
8f4a2124	9	js_to_json,
2b96b06b S	10	NO_DEFAULT,
2b96b06b S	11	PACKED_CODES_RE,
8f4a2124	12	)
018e8355	13
8f4a2124 YCH	14
8f4a2124 YCH	15	class VidziIE(JWPlatformBaseIE):
5c4dcf81 S	16	_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
5c4dcf81 S	17	_TESTS = [{
2c26df76 PH	18	'url': 'http://vidzi.tv/cghql9yq6emu.html',
2c26df76 PH	19	'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
95ee8442	20	'info_dict': {
2c26df76	21	'id': 'cghql9yq6emu',
95ee8442	22	'ext': 'mp4',
2c26df76	23	'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
bd93a12e YCH	24	},
	25	'params': {
	26	# m3u8 download
	27	'skip_download': True,
95ee8442	28	},
5c4dcf81 S	29	}, {
	30	'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
	31	'skip_download': True,
	32	}]
95ee8442	33
95ee8442	34	def _real_extract(self, url):
018e8355	35	video_id = self._match_id(url)
5f6a1245	36
5c4dcf81 S	37	webpage = self._download_webpage(
5c4dcf81 S	38	'http://vidzi.tv/%s' % video_id, video_id)
018e8355	39	title = self._html_search_regex(
2c26df76	40	r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
5f6a1245	41
2b96b06b S	42	packed_codes = [mobj.group(0) for mobj in re.finditer(
	43	PACKED_CODES_RE, webpage)]
	44	for num, pc in enumerate(packed_codes, 1):
	45	code = decode_packed_codes(pc).replace('\\\'', '\'')
	46	jwplayer_data = self._parse_json(
	47	self._search_regex(
	48	r'setup\(([^)]+)\)', code, 'jwplayer data',
	49	default=NO_DEFAULT if num == len(packed_codes) else '{}'),
	50	video_id, transform_source=js_to_json)
	51	if jwplayer_data:
	52	break
8f4a2124 YCH	53
	54	info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
	55	info_dict['title'] = title
	56
	57	return info_dict