]>
Commit | Line | Data |
---|---|---|
c14af7a7 | 1 | import functools |
ac668111 | 2 | import urllib.parse |
c14af7a7 | 3 | import hashlib |
21b9413c | 4 | import json |
001a5fd3 YCH |
5 | |
6 | from .common import InfoExtractor | |
caf0f5f8 | 7 | from ..utils import ( |
95a383be | 8 | ExtractorError, |
c14af7a7 | 9 | OnDemandPagedList, |
caf0f5f8 YCH |
10 | int_or_none, |
11 | mimetype2ext, | |
56793f74 | 12 | qualities, |
c14af7a7 L |
13 | traverse_obj, |
14 | unified_timestamp, | |
caf0f5f8 | 15 | ) |
001a5fd3 YCH |
16 | |
17 | ||
21b9413c | 18 | # https://github.com/yt-dlp/yt-dlp/issues/6671 |
19 | class IwaraBaseIE(InfoExtractor): | |
20 | _USERTOKEN = None | |
21 | _MEDIATOKEN = None | |
22 | _NETRC_MACHINE = 'iwara' | |
23 | ||
24 | def _get_user_token(self, invalidate=False): | |
25 | if not invalidate and self._USERTOKEN: | |
26 | return self._USERTOKEN | |
27 | ||
28 | username, password = self._get_login_info() | |
29 | IwaraBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username) | |
30 | if not IwaraBaseIE._USERTOKEN or invalidate: | |
31 | IwaraBaseIE._USERTOKEN = self._download_json( | |
32 | 'https://api.iwara.tv/user/login', None, note='Logging in', | |
33 | data=json.dumps({ | |
34 | 'email': username, | |
35 | 'password': password | |
36 | }).encode('utf-8'), | |
37 | headers={ | |
38 | 'Content-Type': 'application/json' | |
39 | })['token'] | |
40 | ||
41 | self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN) | |
42 | ||
43 | return self._USERTOKEN | |
44 | ||
45 | def _get_media_token(self, invalidate=False): | |
46 | if not invalidate and self._MEDIATOKEN: | |
47 | return self._MEDIATOKEN | |
48 | ||
49 | IwaraBaseIE._MEDIATOKEN = self._download_json( | |
50 | 'https://api.iwara.tv/user/token', None, note='Fetching media token', | |
51 | data=b'', # Need to have some data here, even if it's empty | |
52 | headers={ | |
53 | 'Authorization': f'Bearer {self._get_user_token()}', | |
54 | 'Content-Type': 'application/json' | |
55 | })['accessToken'] | |
56 | ||
57 | return self._MEDIATOKEN | |
58 | ||
59 | ||
60 | class IwaraIE(IwaraBaseIE): | |
c14af7a7 | 61 | IE_NAME = 'iwara' |
ab92d865 | 62 | _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)' |
001a5fd3 | 63 | _TESTS = [{ |
c14af7a7 L |
64 | # this video cannot be played because of migration |
65 | 'only_matching': True, | |
66 | 'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq', | |
001a5fd3 | 67 | 'info_dict': { |
c14af7a7 | 68 | 'id': 'k2ayoueezfkx6gvq', |
001a5fd3 | 69 | 'ext': 'mp4', |
001a5fd3 | 70 | 'age_limit': 18, |
c14af7a7 L |
71 | 'title': 'Defeat of Irybelda - アイリベルダの敗北', |
72 | 'description': 'md5:70278abebe706647a8b4cb04cf23e0d3', | |
73 | 'uploader': 'Inwerwm', | |
74 | 'uploader_id': 'inwerwm', | |
75 | 'tags': 'count:1', | |
76 | 'like_count': 6133, | |
77 | 'view_count': 1050343, | |
78 | 'comment_count': 1, | |
79 | 'timestamp': 1677843869, | |
80 | 'modified_timestamp': 1679056362, | |
001a5fd3 YCH |
81 | }, |
82 | }, { | |
c14af7a7 L |
83 | 'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/', |
84 | 'md5': '20691ce1473ec2766c0788e14c60ce66', | |
001a5fd3 | 85 | 'info_dict': { |
c14af7a7 | 86 | 'id': '1ywe1sbkqwumpdxz5', |
001a5fd3 | 87 | 'ext': 'mp4', |
001a5fd3 | 88 | 'age_limit': 18, |
c14af7a7 L |
89 | 'title': 'Aponia 阿波尼亚SEX Party Tonight 手动脱衣 大奶 裸腿', |
90 | 'description': 'md5:0c4c310f2e0592d68b9f771d348329ca', | |
91 | 'uploader': '龙也zZZ', | |
92 | 'uploader_id': 'user792540', | |
93 | 'tags': [ | |
94 | 'uncategorized' | |
95 | ], | |
96 | 'like_count': 1809, | |
97 | 'view_count': 25156, | |
98 | 'comment_count': 1, | |
99 | 'timestamp': 1678732213, | |
100 | 'modified_timestamp': 1679110271, | |
001a5fd3 | 101 | }, |
21b9413c | 102 | }, { |
103 | 'url': 'https://iwara.tv/video/blggmfno8ghl725bg', | |
104 | 'info_dict': { | |
105 | 'id': 'blggmfno8ghl725bg', | |
106 | 'ext': 'mp4', | |
107 | 'age_limit': 18, | |
108 | 'title': 'お外でおしっこしちゃう猫耳ロリメイド', | |
109 | 'description': 'md5:0342ba9bf6db09edbbb28729657c3611', | |
110 | 'uploader': 'Fe_Kurosabi', | |
111 | 'uploader_id': 'fekurosabi', | |
112 | 'tags': [ | |
113 | 'pee' | |
114 | ], | |
115 | 'like_count': 192, | |
116 | 'view_count': 12119, | |
117 | 'comment_count': 0, | |
118 | 'timestamp': 1598880567, | |
119 | 'modified_timestamp': 1598908995, | |
120 | 'availability': 'needs_auth', | |
121 | }, | |
001a5fd3 YCH |
122 | }] |
123 | ||
c14af7a7 L |
124 | def _extract_formats(self, video_id, fileurl): |
125 | up = urllib.parse.urlparse(fileurl) | |
126 | q = urllib.parse.parse_qs(up.query) | |
127 | paths = up.path.rstrip('/').split('/') | |
128 | # https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047 | |
129 | x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest() | |
130 | ||
56793f74 | 131 | preference = qualities(['preview', '360', '540', 'Source']) |
132 | ||
c14af7a7 L |
133 | files = self._download_json(fileurl, video_id, headers={'X-Version': x_version}) |
134 | for fmt in files: | |
135 | yield traverse_obj(fmt, { | |
136 | 'format_id': 'name', | |
137 | 'url': ('src', ('view', 'download'), {self._proto_relative_url}), | |
138 | 'ext': ('type', {mimetype2ext}), | |
56793f74 | 139 | 'quality': ('name', {preference}), |
c14af7a7 L |
140 | 'height': ('name', {int_or_none}), |
141 | }, get_all=False) | |
142 | ||
001a5fd3 YCH |
143 | def _real_extract(self, url): |
144 | video_id = self._match_id(url) | |
21b9413c | 145 | username, password = self._get_login_info() |
146 | headers = { | |
147 | 'Authorization': f'Bearer {self._get_media_token()}', | |
148 | } if username and password else None | |
149 | video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True, headers=headers) | |
95a383be L |
150 | errmsg = video_data.get('message') |
151 | # at this point we can actually get uploaded user info, but do we need it? | |
152 | if errmsg == 'errors.privateVideo': | |
153 | self.raise_login_required('Private video. Login if you have permissions to watch') | |
21b9413c | 154 | elif errmsg == 'errors.notFound' and not username: |
155 | self.raise_login_required('Video may need login to view') | |
156 | elif errmsg: # None if success | |
95a383be | 157 | raise ExtractorError(f'Iwara says: {errmsg}') |
caf0f5f8 | 158 | |
56793f74 | 159 | if not video_data.get('fileUrl'): |
160 | if video_data.get('embedUrl'): | |
161 | return self.url_result(video_data.get('embedUrl')) | |
162 | raise ExtractorError('This video is unplayable', expected=True) | |
163 | ||
caf0f5f8 | 164 | return { |
001a5fd3 | 165 | 'id': video_id, |
c14af7a7 L |
166 | 'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0, # ecchi is 'sexy' in Japanese |
167 | **traverse_obj(video_data, { | |
168 | 'title': 'title', | |
169 | 'description': 'body', | |
170 | 'uploader': ('user', 'name'), | |
171 | 'uploader_id': ('user', 'username'), | |
172 | 'tags': ('tags', ..., 'id'), | |
173 | 'like_count': 'numLikes', | |
174 | 'view_count': 'numViews', | |
175 | 'comment_count': 'numComments', | |
176 | 'timestamp': ('createdAt', {unified_timestamp}), | |
177 | 'modified_timestamp': ('updatedAt', {unified_timestamp}), | |
178 | 'thumbnail': ('file', 'id', {str}, { | |
179 | lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}), | |
180 | }), | |
181 | 'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))), | |
caf0f5f8 | 182 | } |
ff4d7860 | 183 | |
21b9413c | 184 | def _perform_login(self, username, password): |
185 | if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token(): | |
186 | self.write_debug('Skipping logging in') | |
187 | return | |
188 | ||
189 | IwaraBaseIE._USERTOKEN = self._get_user_token(True) | |
190 | self._get_media_token(True) | |
191 | self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN) | |
192 | ||
ff4d7860 | 193 | |
21b9413c | 194 | class IwaraUserIE(IwaraBaseIE): |
c14af7a7 L |
195 | _VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)' |
196 | IE_NAME = 'iwara:user' | |
197 | _PER_PAGE = 32 | |
ff4d7860 | 198 | |
199 | _TESTS = [{ | |
c14af7a7 | 200 | 'url': 'https://iwara.tv/profile/user792540/videos', |
ff4d7860 | 201 | 'info_dict': { |
c14af7a7 | 202 | 'id': 'user792540', |
ff4d7860 | 203 | }, |
c14af7a7 | 204 | 'playlist_mincount': 80, |
ff4d7860 | 205 | }, { |
c14af7a7 | 206 | 'url': 'https://iwara.tv/profile/theblackbirdcalls/videos', |
ff4d7860 | 207 | 'info_dict': { |
c14af7a7 | 208 | 'id': 'theblackbirdcalls', |
ff4d7860 | 209 | }, |
c14af7a7 L |
210 | 'playlist_mincount': 723, |
211 | }, { | |
212 | 'url': 'https://iwara.tv/profile/user792540', | |
213 | 'only_matching': True, | |
214 | }, { | |
215 | 'url': 'https://iwara.tv/profile/theblackbirdcalls', | |
216 | 'only_matching': True, | |
ff4d7860 | 217 | }] |
218 | ||
c14af7a7 L |
219 | def _entries(self, playlist_id, user_id, page): |
220 | videos = self._download_json( | |
221 | 'https://api.iwara.tv/videos', playlist_id, | |
222 | note=f'Downloading page {page}', | |
223 | query={ | |
224 | 'page': page, | |
225 | 'sort': 'date', | |
226 | 'user': user_id, | |
227 | 'limit': self._PER_PAGE, | |
228 | }) | |
229 | for x in traverse_obj(videos, ('results', ..., 'id')): | |
230 | yield self.url_result(f'https://iwara.tv/video/{x}') | |
231 | ||
ff4d7860 | 232 | def _real_extract(self, url): |
c14af7a7 L |
233 | playlist_id = self._match_id(url) |
234 | user_info = self._download_json( | |
235 | f'https://api.iwara.tv/profile/{playlist_id}', playlist_id, | |
236 | note='Requesting user info') | |
237 | user_id = traverse_obj(user_info, ('user', 'id')) | |
ff4d7860 | 238 | |
c14af7a7 L |
239 | return self.playlist_result( |
240 | OnDemandPagedList( | |
241 | functools.partial(self._entries, playlist_id, user_id), | |
242 | self._PER_PAGE), | |
243 | playlist_id, traverse_obj(user_info, ('user', 'name'))) | |
ff4d7860 | 244 | |
245 | ||
21b9413c | 246 | class IwaraPlaylistIE(IwaraBaseIE): |
c14af7a7 L |
247 | # the ID is an UUID but I don't think it's necessary to write concrete regex |
248 | _VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)' | |
249 | IE_NAME = 'iwara:playlist' | |
250 | _PER_PAGE = 32 | |
ff4d7860 | 251 | |
252 | _TESTS = [{ | |
c14af7a7 | 253 | 'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f', |
661e7253 | 254 | 'info_dict': { |
c14af7a7 | 255 | 'id': '458e5486-36a4-4ac0-b233-7e9eef01025f', |
661e7253 | 256 | }, |
c14af7a7 | 257 | 'playlist_mincount': 3, |
ff4d7860 | 258 | }] |
259 | ||
c14af7a7 L |
260 | def _entries(self, playlist_id, first_page, page): |
261 | videos = self._download_json( | |
262 | 'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}', | |
263 | query={'page': page, 'limit': self._PER_PAGE}) if page else first_page | |
264 | for x in traverse_obj(videos, ('results', ..., 'id')): | |
265 | yield self.url_result(f'https://iwara.tv/video/{x}') | |
ff4d7860 | 266 | |
267 | def _real_extract(self, url): | |
c14af7a7 L |
268 | playlist_id = self._match_id(url) |
269 | page_0 = self._download_json( | |
270 | f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id, | |
271 | note='Requesting playlist info') | |
ff4d7860 | 272 | |
ff4d7860 | 273 | return self.playlist_result( |
c14af7a7 L |
274 | OnDemandPagedList( |
275 | functools.partial(self._entries, playlist_id, page_0), | |
276 | self._PER_PAGE), | |
277 | playlist_id, traverse_obj(page_0, ('title', 'name'))) |