]>
Commit | Line | Data |
---|---|---|
2632941f | 1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
2632941f | 5 | |
2632941f | 6 | from .crunchyroll import CrunchyrollIE |
7 | ||
b2cf6543 S |
8 | from .common import InfoExtractor |
9 | from ..compat import compat_HTTPError | |
10 | from ..utils import ( | |
11 | ExtractorError, | |
12 | int_or_none, | |
13 | remove_start, | |
14 | xpath_text, | |
15 | ) | |
2632941f | 16 | |
b2cf6543 S |
17 | |
18 | class SoompiBaseIE(InfoExtractor): | |
19 | def _get_episodes(self, webpage, episode_filter=None): | |
20 | episodes = self._parse_json( | |
21 | self._search_regex( | |
22 | r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'), | |
23 | None) | |
24 | return list(filter(episode_filter, episodes)) | |
25 | ||
26 | ||
27 | class SoompiIE(SoompiBaseIE, CrunchyrollIE): | |
2632941f | 28 | IE_NAME = 'soompi' |
b2cf6543 | 29 | _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)' |
2632941f | 30 | _TESTS = [{ |
5137adb9 | 31 | 'url': 'http://tv.soompi.com/en/watch/29235', |
2632941f | 32 | 'info_dict': { |
5137adb9 | 33 | 'id': '29235', |
2632941f | 34 | 'ext': 'mp4', |
5137adb9 | 35 | 'title': 'Episode 1096', |
36 | 'description': '2015-05-20' | |
2632941f | 37 | }, |
38 | 'params': { | |
39 | 'skip_download': True, | |
40 | }, | |
41 | }] | |
42 | ||
b2cf6543 S |
43 | def _get_episode(self, webpage, video_id): |
44 | return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0] | |
2632941f | 45 | |
b2cf6543 | 46 | def _get_subtitles(self, config, video_id): |
2632941f | 47 | sub_langs = {} |
b2cf6543 S |
48 | for subtitle in config.findall('./{default}preload/subtitles/subtitle'): |
49 | sub_langs[subtitle.attrib['id']] = subtitle.attrib['title'] | |
2632941f | 50 | |
b2cf6543 S |
51 | subtitles = {} |
52 | for s in config.findall('./{default}preload/subtitle'): | |
53 | lang_code = sub_langs.get(s.attrib['id']) | |
54 | if not lang_code: | |
55 | continue | |
56 | sub_id = s.get('id') | |
57 | data = xpath_text(s, './data', 'data') | |
58 | iv = xpath_text(s, './iv', 'iv') | |
59 | if not id or not iv or not data: | |
2632941f | 60 | continue |
2632941f | 61 | subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8') |
b2cf6543 | 62 | subtitles[lang_code] = self._extract_subtitles(subtitle) |
2632941f | 63 | return subtitles |
64 | ||
65 | def _real_extract(self, url): | |
66 | video_id = self._match_id(url) | |
67 | ||
b2cf6543 S |
68 | try: |
69 | webpage = self._download_webpage( | |
70 | url, video_id, 'Downloading episode page') | |
71 | except ExtractorError as ee: | |
72 | if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | |
73 | webpage = ee.cause.read() | |
74 | block_message = self._html_search_regex( | |
75 | r'(?s)<div class="block-message">(.+?)</div>', webpage, | |
76 | 'block message', default=None) | |
77 | if block_message: | |
78 | raise ExtractorError(block_message, expected=True) | |
79 | raise | |
2632941f | 80 | |
81 | formats = [] | |
b2cf6543 S |
82 | config = None |
83 | for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage): | |
84 | config = self._download_xml( | |
85 | 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id), | |
86 | video_id, 'Downloading %s XML' % format_id) | |
87 | m3u8_url = xpath_text( | |
88 | config, './{default}preload/stream_info/file', | |
89 | '%s m3u8 URL' % format_id) | |
90 | if not m3u8_url: | |
91 | continue | |
92 | formats.extend(self._extract_m3u8_formats( | |
93 | m3u8_url, video_id, 'mp4', m3u8_id=format_id)) | |
2632941f | 94 | self._sort_formats(formats) |
95 | ||
b2cf6543 S |
96 | episode = self._get_episode(webpage, video_id) |
97 | ||
98 | title = episode['name'] | |
99 | description = episode.get('description') | |
100 | duration = int_or_none(episode.get('duration')) | |
101 | ||
102 | thumbnails = [{ | |
103 | 'id': thumbnail_id, | |
104 | 'url': thumbnail_url, | |
105 | } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()] | |
106 | ||
107 | subtitles = self.extract_subtitles(config, video_id) | |
2632941f | 108 | |
109 | return { | |
110 | 'id': video_id, | |
111 | 'title': title, | |
112 | 'description': description, | |
b2cf6543 | 113 | 'thumbnails': thumbnails, |
2632941f | 114 | 'duration': duration, |
115 | 'formats': formats, | |
116 | 'subtitles': subtitles | |
117 | } | |
118 | ||
119 | ||
b2cf6543 | 120 | class SoompiShowIE(SoompiBaseIE): |
2632941f | 121 | IE_NAME = 'soompi:show' |
b2cf6543 | 122 | _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)' |
2632941f | 123 | _TESTS = [{ |
124 | 'url': 'http://tv.soompi.com/en/shows/liar-game', | |
125 | 'info_dict': { | |
126 | 'id': 'liar-game', | |
127 | 'title': 'Liar Game', | |
128 | 'description': 'md5:52c02bce0c1a622a95823591d0589b66', | |
129 | }, | |
130 | 'playlist_count': 14, | |
131 | }] | |
132 | ||
133 | def _real_extract(self, url): | |
134 | show_id = self._match_id(url) | |
135 | ||
b2cf6543 S |
136 | webpage = self._download_webpage( |
137 | url, show_id, 'Downloading show page') | |
138 | ||
139 | title = remove_start(self._og_search_title(webpage), 'SoompiTV | ') | |
2632941f | 140 | description = self._og_search_description(webpage) |
141 | ||
b2cf6543 S |
142 | entries = [ |
143 | self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi') | |
144 | for episode in self._get_episodes(webpage)] | |
2632941f | 145 | |
146 | return self.playlist_result(entries, show_id, title, description) |