[yt-dlp.git] / yt_dlp / extractor / vshare.py

import re

from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
    decode_packed_codes,
    ExtractorError,
)


class VShareIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://vshare.io/d/0f64ce6',
        'md5': '17b39f55b5497ae8b59f5fbce8e35886',
        'info_dict': {
            'id': '0f64ce6',
            'title': 'vl14062007715967',
            'ext': 'mp4',
        }
    }, {
        'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
            webpage)

    def _extract_packed(self, webpage):
        packed = self._search_regex(
            r'(eval\(function.+)', webpage, 'packed code')
        unpacked = decode_packed_codes(packed)
        digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
        digits = [int(digit) for digit in digits.split(',')]
        key_digit = self._search_regex(
            r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
        chars = [compat_chr(d - int(key_digit)) for d in digits]
        return ''.join(chars)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
            video_id, headers={'Referer': url})

        title = self._html_extract_title(webpage)
        title = title.split(' - ')[0]

        error = self._html_search_regex(
            r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
            'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        info = self._parse_html5_media_entries(
            url, '<video>%s</video>' % self._extract_packed(webpage),
            video_id)[0]

        self._sort_formats(info['formats'])

        info.update({
            'id': video_id,
            'title': title,
        })

        return info
Commit	Line	Data
0987f2dd T	1	import re
0987f2dd T	2
2ab0bfcd	3	from .common import InfoExtractor
0987f2dd	4	from ..compat import compat_chr
ff31f2d5 S	5	from ..utils import (
	6	decode_packed_codes,
	7	ExtractorError,
	8	)
2ab0bfcd S	9
	10
	11	class VShareIE(InfoExtractor):
	12	_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
	13	_TESTS = [{
	14	'url': 'https://vshare.io/d/0f64ce6',
0987f2dd	15	'md5': '17b39f55b5497ae8b59f5fbce8e35886',
2ab0bfcd S	16	'info_dict': {
	17	'id': '0f64ce6',
	18	'title': 'vl14062007715967',
	19	'ext': 'mp4',
	20	}
	21	}, {
	22	'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
	23	'only_matching': True,
	24	}]
	25
a2b6aba8 S	26	@staticmethod
	27	def _extract_urls(webpage):
	28	return re.findall(
	29	r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
	30	webpage)
	31
0987f2dd	32	def _extract_packed(self, webpage):
a2b6aba8 S	33	packed = self._search_regex(
a2b6aba8 S	34	r'(eval\(function.+)', webpage, 'packed code')
0987f2dd T	35	unpacked = decode_packed_codes(packed)
0987f2dd T	36	digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
a2b6aba8 S	37	digits = [int(digit) for digit in digits.split(',')]
	38	key_digit = self._search_regex(
	39	r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
0987f2dd T	40	chars = [compat_chr(d - int(key_digit)) for d in digits]
	41	return ''.join(chars)
	42
2ab0bfcd S	43	def _real_extract(self, url):
	44	video_id = self._match_id(url)
	45
	46	webpage = self._download_webpage(
a2b6aba8	47	'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
794c1b6e	48	video_id, headers={'Referer': url})
2ab0bfcd	49
04f3fd2c	50	title = self._html_extract_title(webpage)
0987f2dd	51	title = title.split(' - ')[0]
2ab0bfcd	52
ff31f2d5 S	53	error = self._html_search_regex(
	54	r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
	55	'error', default=None)
	56	if error:
	57	raise ExtractorError(error, expected=True)
	58
a2b6aba8 S	59	info = self._parse_html5_media_entries(
	60	url, '<video>%s</video>' % self._extract_packed(webpage),
	61	video_id)[0]
	62
	63	self._sort_formats(info['formats'])
	64
	65	info.update({
2ab0bfcd S	66	'id': video_id,
2ab0bfcd S	67	'title': title,
a2b6aba8	68	})
0987f2dd	69
a2b6aba8	70	return info