]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/vshare.py
[vshare] Capture and output error message
[yt-dlp.git] / youtube_dl / extractor / vshare.py
CommitLineData
2ab0bfcd
S
1# coding: utf-8
2from __future__ import unicode_literals
3
0987f2dd
T
4import re
5
2ab0bfcd 6from .common import InfoExtractor
0987f2dd 7from ..compat import compat_chr
ff31f2d5
S
8from ..utils import (
9 decode_packed_codes,
10 ExtractorError,
11)
2ab0bfcd
S
12
13
14class VShareIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
16 _TESTS = [{
17 'url': 'https://vshare.io/d/0f64ce6',
0987f2dd 18 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
2ab0bfcd
S
19 'info_dict': {
20 'id': '0f64ce6',
21 'title': 'vl14062007715967',
22 'ext': 'mp4',
23 }
24 }, {
25 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
26 'only_matching': True,
27 }]
28
0987f2dd
T
29 def _extract_packed(self, webpage):
30 packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
31 unpacked = decode_packed_codes(packed)
32 digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
33 digits = digits.split(',')
34 digits = [int(digit) for digit in digits]
35 key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
36 chars = [compat_chr(d - int(key_digit)) for d in digits]
37 return ''.join(chars)
38
2ab0bfcd
S
39 def _real_extract(self, url):
40 video_id = self._match_id(url)
41
42 webpage = self._download_webpage(
0987f2dd 43 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)
2ab0bfcd 44
0987f2dd
T
45 title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
46 title = title.split(' - ')[0]
2ab0bfcd 47
ff31f2d5
S
48 error = self._html_search_regex(
49 r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
50 'error', default=None)
51 if error:
52 raise ExtractorError(error, expected=True)
53
0987f2dd
T
54 unpacked = self._extract_packed(webpage)
55 video_urls = re.findall(r'<source src="([^"]+)', unpacked)
56 formats = [{'url': video_url} for video_url in video_urls]
2ab0bfcd
S
57 return {
58 'id': video_id,
59 'title': title,
0987f2dd 60 'formats': formats,
2ab0bfcd 61 }
0987f2dd
T
62
63 @staticmethod
64 def _extract_urls(webpage):
65 return re.findall(
66 r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
67 webpage)