]>
Commit | Line | Data |
---|---|---|
4f5f18ac PH |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | compat_HTTPError, | |
6 | compat_str, | |
7 | compat_urllib_parse, | |
8 | compat_urllib_parse_urlparse, | |
9 | ||
10 | ExtractorError, | |
11 | ) | |
12 | ||
13 | ||
14 | class AddAnimeIE(InfoExtractor): | |
15 | ||
c0ade33e | 16 | _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)' |
4f5f18ac PH |
17 | IE_NAME = u'AddAnime' |
18 | _TEST = { | |
19 | u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', | |
14e10b2b | 20 | u'file': u'24MR3YO5SAS9.mp4', |
3e6a330d | 21 | u'md5': u'72954ea10bc979ab5e2eb288b21425a0', |
4f5f18ac PH |
22 | u'info_dict': { |
23 | u"description": u"One Piece 606", | |
24 | u"title": u"One Piece 606" | |
25 | } | |
26 | } | |
27 | ||
28 | def _real_extract(self, url): | |
29 | try: | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | video_id = mobj.group('video_id') | |
32 | webpage = self._download_webpage(url, video_id) | |
33 | except ExtractorError as ee: | |
77d0a82f PH |
34 | if not isinstance(ee.cause, compat_HTTPError) or \ |
35 | ee.cause.code != 503: | |
4f5f18ac PH |
36 | raise |
37 | ||
38 | redir_webpage = ee.cause.read().decode('utf-8') | |
39 | action = self._search_regex( | |
40 | r'<form id="challenge-form" action="([^"]+)"', | |
41 | redir_webpage, u'Redirect form') | |
42 | vc = self._search_regex( | |
43 | r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>', | |
44 | redir_webpage, u'redirect vc value') | |
45 | av = re.search( | |
46 | r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', | |
47 | redir_webpage) | |
48 | if av is None: | |
49 | raise ExtractorError(u'Cannot find redirect math task') | |
50 | av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) | |
51 | ||
52 | parsed_url = compat_urllib_parse_urlparse(url) | |
53 | av_val = av_res + len(parsed_url.netloc) | |
54 | confirm_url = ( | |
55 | parsed_url.scheme + u'://' + parsed_url.netloc + | |
56 | action + '?' + | |
57 | compat_urllib_parse.urlencode({ | |
58 | 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) | |
59 | self._download_webpage( | |
60 | confirm_url, video_id, | |
61 | note=u'Confirming after redirect') | |
62 | webpage = self._download_webpage(url, video_id) | |
63 | ||
77d0a82f PH |
64 | formats = [] |
65 | for format_id in ('normal', 'hq'): | |
66 | rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) | |
67 | video_url = self._search_regex(rex, webpage, u'video file URLx', | |
68 | fatal=False) | |
69 | if not video_url: | |
70 | continue | |
71 | formats.append({ | |
72 | 'format_id': format_id, | |
73 | 'url': video_url, | |
77d0a82f PH |
74 | }) |
75 | if not formats: | |
76 | raise ExtractorError(u'Cannot find any video format!') | |
4f5f18ac PH |
77 | video_title = self._og_search_title(webpage) |
78 | video_description = self._og_search_description(webpage) | |
79 | ||
80 | return { | |
81 | '_type': 'video', | |
82 | 'id': video_id, | |
77d0a82f | 83 | 'formats': formats, |
4f5f18ac PH |
84 | 'title': video_title, |
85 | 'description': video_description | |
86 | } |