[yt-dlp.git] / youtube_dl / extractor / vshare.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
    decode_packed_codes,
    ExtractorError,
)


class VShareIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://vshare.io/d/0f64ce6',
        'md5': '17b39f55b5497ae8b59f5fbce8e35886',
        'info_dict': {
            'id': '0f64ce6',
            'title': 'vl14062007715967',
            'ext': 'mp4',
        }
    }, {
        'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
        'only_matching': True,
    }]

    def _extract_packed(self, webpage):
        packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
        unpacked = decode_packed_codes(packed)
        digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
        digits = digits.split(',')
        digits = [int(digit) for digit in digits]
        key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
        chars = [compat_chr(d - int(key_digit)) for d in digits]
        return ''.join(chars)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)

        title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
        title = title.split(' - ')[0]

        error = self._html_search_regex(
            r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
            'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        unpacked = self._extract_packed(webpage)
        video_urls = re.findall(r'<source src="([^"]+)', unpacked)
        formats = [{'url': video_url} for video_url in video_urls]
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
        }

    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
            webpage)
Commit	Line	Data
2ab0bfcd S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
0987f2dd T	4	import re
0987f2dd T	5
2ab0bfcd	6	from .common import InfoExtractor
0987f2dd	7	from ..compat import compat_chr
ff31f2d5 S	8	from ..utils import (
	9	decode_packed_codes,
	10	ExtractorError,
	11	)
2ab0bfcd S	12
	13
	14	class VShareIE(InfoExtractor):
	15	_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
	16	_TESTS = [{
	17	'url': 'https://vshare.io/d/0f64ce6',
0987f2dd	18	'md5': '17b39f55b5497ae8b59f5fbce8e35886',
2ab0bfcd S	19	'info_dict': {
	20	'id': '0f64ce6',
	21	'title': 'vl14062007715967',
	22	'ext': 'mp4',
	23	}
	24	}, {
	25	'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
	26	'only_matching': True,
	27	}]
	28
0987f2dd T	29	def _extract_packed(self, webpage):
	30	packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
	31	unpacked = decode_packed_codes(packed)
	32	digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
	33	digits = digits.split(',')
	34	digits = [int(digit) for digit in digits]
	35	key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
	36	chars = [compat_chr(d - int(key_digit)) for d in digits]
	37	return ''.join(chars)
	38
2ab0bfcd S	39	def _real_extract(self, url):
	40	video_id = self._match_id(url)
	41
	42	webpage = self._download_webpage(
0987f2dd	43	'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)
2ab0bfcd	44
0987f2dd T	45	title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
0987f2dd T	46	title = title.split(' - ')[0]
2ab0bfcd	47
ff31f2d5 S	48	error = self._html_search_regex(
	49	r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
	50	'error', default=None)
	51	if error:
	52	raise ExtractorError(error, expected=True)
	53
0987f2dd T	54	unpacked = self._extract_packed(webpage)
	55	video_urls = re.findall(r'<source src="([^"]+)', unpacked)
	56	formats = [{'url': video_url} for video_url in video_urls]
2ab0bfcd S	57	return {
	58	'id': video_id,
	59	'title': title,
0987f2dd	60	'formats': formats,
2ab0bfcd	61	}
0987f2dd T	62
	63	@staticmethod
	64	def _extract_urls(webpage):
	65	return re.findall(
	66	r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
	67	webpage)