]>
Commit | Line | Data |
---|---|---|
2bfeee69 YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..compat import compat_chr | |
9e3c2f1d | 8 | from ..utils import ( |
594b0c4c | 9 | determine_ext, |
9e3c2f1d YCH |
10 | encode_base_n, |
11 | ExtractorError, | |
594b0c4c | 12 | mimetype2ext, |
9e3c2f1d | 13 | ) |
2bfeee69 YCH |
14 | |
15 | ||
16 | class OpenloadIE(InfoExtractor): | |
9e3c2f1d | 17 | _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)' |
2bfeee69 | 18 | |
9e3c2f1d | 19 | _TESTS = [{ |
2bfeee69 YCH |
20 | 'url': 'https://openload.co/f/kUEfGclsU9o', |
21 | 'md5': 'bf1c059b004ebc7a256f89408e65c36e', | |
22 | 'info_dict': { | |
23 | 'id': 'kUEfGclsU9o', | |
24 | 'ext': 'mp4', | |
25 | 'title': 'skyrim_no-audio_1080.mp4', | |
9e3c2f1d | 26 | 'thumbnail': 're:^https?://.*\.jpg$', |
2bfeee69 | 27 | }, |
9e3c2f1d YCH |
28 | }, { |
29 | 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', | |
30 | 'only_matching': True, | |
31 | }, { | |
32 | 'url': 'https://openload.io/f/ZAn6oz-VZGE/', | |
33 | 'only_matching': True, | |
e9063b5d S |
34 | }, { |
35 | # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout | |
36 | # for title and ext | |
37 | 'url': 'https://openload.co/embed/Sxz5sADo82g/', | |
38 | 'only_matching': True, | |
9e3c2f1d | 39 | }] |
2bfeee69 YCH |
40 | |
41 | @staticmethod | |
42 | def openload_level2_debase(m): | |
43 | radix, num = int(m.group(1)) + 27, int(m.group(2)) | |
44 | return '"' + encode_base_n(num, radix) + '"' | |
45 | ||
46 | @classmethod | |
47 | def openload_level2(cls, txt): | |
48 | # The function name is ǃ \u01c3 | |
49 | # Using escaped unicode literals does not work in Python 3.2 | |
50 | return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '') | |
51 | ||
52 | # Openload uses a variant of aadecode | |
53 | # openload_decode and related functions are originally written by | |
54 | # vitas@matfyz.cz and released with public domain | |
55 | # See https://github.com/rg3/youtube-dl/issues/8489 | |
56 | @classmethod | |
57 | def openload_decode(cls, txt): | |
58 | symbol_table = [ | |
59 | ('_', '(゚Д゚) [゚Θ゚]'), | |
60 | ('a', '(゚Д゚) [゚ω゚ノ]'), | |
61 | ('b', '(゚Д゚) [゚Θ゚ノ]'), | |
62 | ('c', '(゚Д゚) [\'c\']'), | |
63 | ('d', '(゚Д゚) [゚ー゚ノ]'), | |
64 | ('e', '(゚Д゚) [゚Д゚ノ]'), | |
65 | ('f', '(゚Д゚) [1]'), | |
66 | ||
67 | ('o', '(゚Д゚) [\'o\']'), | |
68 | ('u', '(o゚ー゚o)'), | |
69 | ('c', '(゚Д゚) [\'c\']'), | |
70 | ||
71 | ('7', '((゚ー゚) + (o^_^o))'), | |
72 | ('6', '((o^_^o) +(o^_^o) +(c^_^o))'), | |
73 | ('5', '((゚ー゚) + (゚Θ゚))'), | |
74 | ('4', '(-~3)'), | |
75 | ('3', '(-~-~1)'), | |
76 | ('2', '(-~1)'), | |
77 | ('1', '(-~0)'), | |
78 | ('0', '((c^_^o)-(c^_^o))'), | |
79 | ] | |
80 | delim = '(゚Д゚)[゚ε゚]+' | |
81 | ret = '' | |
82 | for aachar in txt.split(delim): | |
83 | for val, pat in symbol_table: | |
84 | aachar = aachar.replace(pat, val) | |
85 | aachar = aachar.replace('+ ', '') | |
86 | m = re.match(r'^\d+', aachar) | |
87 | if m: | |
88 | ret += compat_chr(int(m.group(0), 8)) | |
89 | else: | |
90 | m = re.match(r'^u([\da-f]+)', aachar) | |
91 | if m: | |
92 | ret += compat_chr(int(m.group(1), 16)) | |
93 | return cls.openload_level2(ret) | |
94 | ||
95 | def _real_extract(self, url): | |
96 | video_id = self._match_id(url) | |
97 | webpage = self._download_webpage(url, video_id) | |
9e3c2f1d YCH |
98 | |
99 | if 'File not found' in webpage: | |
100 | raise ExtractorError('File not found', expected=True) | |
101 | ||
2bfeee69 | 102 | code = self._search_regex( |
cc1028aa | 103 | r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>', |
2bfeee69 YCH |
104 | webpage, 'JS code') |
105 | ||
594b0c4c S |
106 | decoded = self.openload_decode(code) |
107 | ||
2bfeee69 | 108 | video_url = self._search_regex( |
594b0c4c | 109 | r'return\s+"(https?://[^"]+)"', decoded, 'video URL') |
2bfeee69 | 110 | |
a1394b82 S |
111 | title = self._og_search_title(webpage, default=None) or self._search_regex( |
112 | r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, | |
113 | 'title', default=None) or self._html_search_meta( | |
114 | 'description', webpage, 'title', fatal=True) | |
115 | ||
594b0c4c S |
116 | ext = mimetype2ext(self._search_regex( |
117 | r'window\.vt\s*=\s*(["\'])(?P<mimetype>.+?)\1', decoded, | |
118 | 'mimetype', default=None, group='mimetype')) or determine_ext( | |
119 | video_url, 'mp4') | |
120 | ||
2bfeee69 YCH |
121 | return { |
122 | 'id': video_id, | |
a1394b82 | 123 | 'title': title, |
594b0c4c | 124 | 'ext': ext, |
c83a3522 | 125 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), |
2bfeee69 YCH |
126 | 'url': video_url, |
127 | } |