[yt-dlp.git] / yt_dlp / extractor / vshare.py

from .common import InfoExtractor
from ..utils import ExtractorError, decode_packed_codes


class VShareIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
    _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
    _TESTS = [{
        'url': 'https://vshare.io/d/0f64ce6',
        'md5': '17b39f55b5497ae8b59f5fbce8e35886',
        'info_dict': {
            'id': '0f64ce6',
            'title': 'vl14062007715967',
            'ext': 'mp4',
        }
    }, {
        'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
        'only_matching': True,
    }]

    def _extract_packed(self, webpage):
        packed = self._search_regex(
            r'(eval\(function.+)', webpage, 'packed code')
        unpacked = decode_packed_codes(packed)
        digits = self._search_regex(r'\[([\d,]+)\]', unpacked, 'digits')
        digits = [int(digit) for digit in digits.split(',')]
        key_digit = self._search_regex(
            r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
        chars = [chr(d - int(key_digit)) for d in digits]
        return ''.join(chars)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
            video_id, headers={'Referer': url})

        title = self._html_extract_title(webpage)
        title = title.split(' - ')[0]

        error = self._html_search_regex(
            r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
            'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        info = self._parse_html5_media_entries(
            url, '<video>%s</video>' % self._extract_packed(webpage),
            video_id)[0]

        info.update({
            'id': video_id,
            'title': title,
        })

        return info
Commit	Line	Data
2ab0bfcd	1	from .common import InfoExtractor
ac668111	2	from ..utils import ExtractorError, decode_packed_codes
2ab0bfcd S	3
	4
	5	class VShareIE(InfoExtractor):
	6	_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
bfd973ec	7	_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
2ab0bfcd S	8	_TESTS = [{
2ab0bfcd S	9	'url': 'https://vshare.io/d/0f64ce6',
0987f2dd	10	'md5': '17b39f55b5497ae8b59f5fbce8e35886',
2ab0bfcd S	11	'info_dict': {
	12	'id': '0f64ce6',
	13	'title': 'vl14062007715967',
	14	'ext': 'mp4',
	15	}
	16	}, {
	17	'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
	18	'only_matching': True,
	19	}]
	20
0987f2dd	21	def _extract_packed(self, webpage):
a2b6aba8 S	22	packed = self._search_regex(
a2b6aba8 S	23	r'(eval\(function.+)', webpage, 'packed code')
0987f2dd	24	unpacked = decode_packed_codes(packed)
337734d4	25	digits = self._search_regex(r'\[([\d,]+)\]', unpacked, 'digits')
a2b6aba8 S	26	digits = [int(digit) for digit in digits.split(',')]
	27	key_digit = self._search_regex(
	28	r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
ac668111	29	chars = [chr(d - int(key_digit)) for d in digits]
0987f2dd T	30	return ''.join(chars)
0987f2dd T	31
2ab0bfcd S	32	def _real_extract(self, url):
	33	video_id = self._match_id(url)
	34
	35	webpage = self._download_webpage(
a2b6aba8	36	'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
794c1b6e	37	video_id, headers={'Referer': url})
2ab0bfcd	38
04f3fd2c	39	title = self._html_extract_title(webpage)
0987f2dd	40	title = title.split(' - ')[0]
2ab0bfcd	41
ff31f2d5 S	42	error = self._html_search_regex(
	43	r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
	44	'error', default=None)
	45	if error:
	46	raise ExtractorError(error, expected=True)
	47
a2b6aba8 S	48	info = self._parse_html5_media_entries(
	49	url, '<video>%s</video>' % self._extract_packed(webpage),
	50	video_id)[0]
	51
a2b6aba8	52	info.update({
2ab0bfcd S	53	'id': video_id,
2ab0bfcd S	54	'title': title,
a2b6aba8	55	})
0987f2dd	56
a2b6aba8	57	return info