]>
Commit | Line | Data |
---|---|---|
56005066 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
29f7c58a | 4 | import functools |
56005066 S |
5 | import re |
6 | ||
7 | from .common import InfoExtractor | |
8 | from .kaltura import KalturaIE | |
29f7c58a | 9 | from ..utils import ( |
10 | extract_attributes, | |
11 | int_or_none, | |
12 | OnDemandPagedList, | |
13 | parse_age_limit, | |
14 | strip_or_none, | |
15 | try_get, | |
16 | ) | |
17 | ||
18 | ||
19 | class AsianCrushBaseIE(InfoExtractor): | |
20 | _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))' | |
21 | _KALTURA_KEYS = [ | |
22 | 'video_url', 'progressive_url', 'download_url', 'thumbnail_url', | |
23 | 'widescreen_thumbnail_url', 'screencap_widescreen', | |
24 | ] | |
25 | _API_SUFFIX = {'retrocrush.tv': '-ott'} | |
26 | ||
27 | def _call_api(self, host, endpoint, video_id, query, resource): | |
28 | return self._download_json( | |
29 | 'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id, | |
30 | 'Downloading %s JSON metadata' % resource, query=query, | |
31 | headers=self.geo_verification_headers())['objects'] | |
32 | ||
33 | def _download_object_data(self, host, object_id, resource): | |
34 | return self._call_api( | |
35 | host, 'search', object_id, {'id': object_id}, resource)[0] | |
36 | ||
37 | def _get_object_description(self, obj): | |
38 | return strip_or_none(obj.get('long_description') or obj.get('short_description')) | |
39 | ||
40 | def _parse_video_data(self, video): | |
41 | title = video['name'] | |
42 | ||
43 | entry_id, partner_id = [None] * 2 | |
44 | for k in self._KALTURA_KEYS: | |
45 | k_url = video.get(k) | |
46 | if k_url: | |
47 | mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url) | |
48 | if mobj: | |
49 | partner_id, entry_id = mobj.groups() | |
50 | break | |
51 | ||
52 | meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or [] | |
53 | categories = list(filter(None, [c.get('name') for c in meta_categories])) | |
54 | ||
55 | show_info = video.get('show_info') or {} | |
56 | ||
57 | return { | |
58 | '_type': 'url_transparent', | |
59 | 'url': 'kaltura:%s:%s' % (partner_id, entry_id), | |
60 | 'ie_key': KalturaIE.ie_key(), | |
61 | 'id': entry_id, | |
62 | 'title': title, | |
63 | 'description': self._get_object_description(video), | |
64 | 'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')), | |
65 | 'categories': categories, | |
66 | 'series': show_info.get('show_name'), | |
67 | 'season_number': int_or_none(show_info.get('season_num')), | |
68 | 'season_id': show_info.get('season_id'), | |
69 | 'episode_number': int_or_none(show_info.get('episode_num')), | |
70 | } | |
56005066 S |
71 | |
72 | ||
29f7c58a | 73 | class AsianCrushIE(AsianCrushBaseIE): |
74 | _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE | |
56005066 | 75 | _TESTS = [{ |
29f7c58a | 76 | 'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt', |
56005066 S |
77 | 'md5': 'c3b740e48d0ba002a42c0b72857beae6', |
78 | 'info_dict': { | |
79 | 'id': '1_y4tmjm5r', | |
80 | 'ext': 'mp4', | |
81 | 'title': 'Women Who Flirt', | |
29f7c58a | 82 | 'description': 'md5:b65c7e0ae03a85585476a62a186f924c', |
56005066 S |
83 | 'timestamp': 1496936429, |
84 | 'upload_date': '20170608', | |
85 | 'uploader_id': 'craig@crifkin.com', | |
29f7c58a | 86 | 'age_limit': 13, |
87 | 'categories': 'count:5', | |
88 | 'duration': 5812, | |
56005066 S |
89 | }, |
90 | }, { | |
91 | 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', | |
92 | 'only_matching': True, | |
f6149686 S |
93 | }, { |
94 | 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', | |
95 | 'only_matching': True, | |
96 | }, { | |
97 | 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', | |
98 | 'only_matching': True, | |
99 | }, { | |
100 | 'url': 'https://www.midnightpulp.com/video/010400v/drifters/', | |
101 | 'only_matching': True, | |
102 | }, { | |
103 | 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', | |
104 | 'only_matching': True, | |
105 | }, { | |
106 | 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', | |
107 | 'only_matching': True, | |
29f7c58a | 108 | }, { |
109 | 'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears', | |
110 | 'only_matching': True, | |
56005066 S |
111 | }] |
112 | ||
113 | def _real_extract(self, url): | |
29f7c58a | 114 | host, video_id = re.match(self._VALID_URL, url).groups() |
56005066 | 115 | |
29f7c58a | 116 | if host == 'cocoro.tv': |
117 | webpage = self._download_webpage(url, video_id) | |
118 | embed_vars = self._parse_json(self._search_regex( | |
0f2aa0dc | 119 | r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', |
29f7c58a | 120 | default='{}'), video_id, fatal=False) or {} |
121 | video_id = embed_vars.get('entry_id') or video_id | |
0f2aa0dc | 122 | |
29f7c58a | 123 | video = self._download_object_data(host, video_id, 'video') |
124 | return self._parse_video_data(video) | |
0f2aa0dc | 125 | |
0f2aa0dc | 126 | |
29f7c58a | 127 | class AsianCrushPlaylistIE(AsianCrushBaseIE): |
128 | _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE | |
f6149686 | 129 | _TESTS = [{ |
29f7c58a | 130 | 'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai', |
56005066 | 131 | 'info_dict': { |
29f7c58a | 132 | 'id': '6447', |
133 | 'title': 'Fruity Samurai', | |
134 | 'description': 'md5:7535174487e4a202d3872a7fc8f2f154', | |
56005066 | 135 | }, |
29f7c58a | 136 | 'playlist_count': 13, |
f6149686 S |
137 | }, { |
138 | 'url': 'https://www.yuyutv.com/series/013920s/peep-show/', | |
139 | 'only_matching': True, | |
140 | }, { | |
141 | 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', | |
142 | 'only_matching': True, | |
143 | }, { | |
144 | 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', | |
145 | 'only_matching': True, | |
29f7c58a | 146 | }, { |
147 | 'url': 'https://www.retrocrush.tv/series/012355s/true-tears', | |
148 | 'only_matching': True, | |
f6149686 | 149 | }] |
29f7c58a | 150 | _PAGE_SIZE = 1000000000 |
151 | ||
152 | def _fetch_page(self, domain, parent_id, page): | |
153 | videos = self._call_api( | |
154 | domain, 'getreferencedobjects', parent_id, { | |
155 | 'max': self._PAGE_SIZE, | |
156 | 'object_type': 'video', | |
157 | 'parent_id': parent_id, | |
158 | 'start': page * self._PAGE_SIZE, | |
159 | }, 'page %d' % (page + 1)) | |
160 | for video in videos: | |
161 | yield self._parse_video_data(video) | |
56005066 S |
162 | |
163 | def _real_extract(self, url): | |
29f7c58a | 164 | host, playlist_id = re.match(self._VALID_URL, url).groups() |
165 | ||
166 | if host == 'cocoro.tv': | |
167 | webpage = self._download_webpage(url, playlist_id) | |
168 | ||
169 | entries = [] | |
170 | ||
171 | for mobj in re.finditer( | |
172 | r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, | |
173 | webpage): | |
174 | attrs = extract_attributes(mobj.group(0)) | |
175 | if attrs.get('class') == 'clearfix': | |
176 | entries.append(self.url_result( | |
177 | mobj.group('url'), ie=AsianCrushIE.ie_key())) | |
178 | ||
179 | title = self._html_search_regex( | |
180 | r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage, | |
181 | 'title', default=None) or self._og_search_title( | |
182 | webpage, default=None) or self._html_search_meta( | |
183 | 'twitter:title', webpage, 'title', | |
184 | default=None) or self._search_regex( | |
185 | r'<title>([^<]+)</title>', webpage, 'title', fatal=False) | |
186 | if title: | |
187 | title = re.sub(r'\s*\|\s*.+?$', '', title) | |
188 | ||
189 | description = self._og_search_description( | |
190 | webpage, default=None) or self._html_search_meta( | |
191 | 'twitter:description', webpage, 'description', fatal=False) | |
192 | else: | |
193 | show = self._download_object_data(host, playlist_id, 'show') | |
194 | title = show.get('name') | |
195 | description = self._get_object_description(show) | |
196 | entries = OnDemandPagedList( | |
197 | functools.partial(self._fetch_page, host, playlist_id), | |
198 | self._PAGE_SIZE) | |
56005066 S |
199 | |
200 | return self.playlist_result(entries, playlist_id, title, description) |