]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/iwara.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / iwara.py
CommitLineData
c14af7a7 1import functools
c14af7a7 2import hashlib
21b9413c 3import json
0a5d7c39 4import time
e897bd82
SS
5import urllib.error
6import urllib.parse
001a5fd3
YCH
7
8from .common import InfoExtractor
caf0f5f8 9from ..utils import (
95a383be 10 ExtractorError,
c14af7a7 11 OnDemandPagedList,
caf0f5f8 12 int_or_none,
0a5d7c39 13 jwt_decode_hs256,
caf0f5f8 14 mimetype2ext,
56793f74 15 qualities,
c14af7a7 16 traverse_obj,
0a5d7c39 17 try_call,
c14af7a7 18 unified_timestamp,
caf0f5f8 19)
001a5fd3
YCH
20
21
21b9413c 22class IwaraBaseIE(InfoExtractor):
0a5d7c39 23 _NETRC_MACHINE = 'iwara'
21b9413c 24 _USERTOKEN = None
25 _MEDIATOKEN = None
21b9413c 26
0a5d7c39 27 def _is_token_expired(self, token, token_type):
28 # User token TTL == ~3 weeks, Media token TTL == ~1 hour
29 if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120):
30 self.to_screen(f'{token_type} token has expired')
31 return True
21b9413c 32
0a5d7c39 33 def _get_user_token(self):
21b9413c 34 username, password = self._get_login_info()
0a5d7c39 35 if not username or not password:
36 return
37
38 user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username)
39 if not user_token or self._is_token_expired(user_token, 'User'):
40 response = self._download_json(
21b9413c 41 'https://api.iwara.tv/user/login', None, note='Logging in',
0a5d7c39 42 headers={'Content-Type': 'application/json'}, data=json.dumps({
21b9413c 43 'email': username,
add96eb9 44 'password': password,
0a5d7c39 45 }).encode(), expected_status=lambda x: True)
46 user_token = traverse_obj(response, ('token', {str}))
47 if not user_token:
48 error = traverse_obj(response, ('message', {str}))
49 if 'invalidLogin' in error:
50 raise ExtractorError('Invalid login credentials', expected=True)
51 else:
52 raise ExtractorError(f'Iwara API said: {error or "nothing"}')
53
54 self.cache.store(self._NETRC_MACHINE, username, user_token)
55
56 IwaraBaseIE._USERTOKEN = user_token
57
58 def _get_media_token(self):
59 self._get_user_token()
60 if not IwaraBaseIE._USERTOKEN:
61 return # user has not passed credentials
62
63 if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'):
64 IwaraBaseIE._MEDIATOKEN = self._download_json(
65 'https://api.iwara.tv/user/token', None, note='Fetching media token',
66 data=b'', headers={
67 'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}',
add96eb9 68 'Content-Type': 'application/json',
0a5d7c39 69 })['accessToken']
21b9413c 70
0a5d7c39 71 return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'}
21b9413c 72
0a5d7c39 73 def _perform_login(self, username, password):
74 self._get_media_token()
21b9413c 75
76
77class IwaraIE(IwaraBaseIE):
c14af7a7 78 IE_NAME = 'iwara'
ab92d865 79 _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
001a5fd3 80 _TESTS = [{
c14af7a7 81 'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
001a5fd3 82 'info_dict': {
c14af7a7 83 'id': 'k2ayoueezfkx6gvq',
001a5fd3 84 'ext': 'mp4',
001a5fd3 85 'age_limit': 18,
c14af7a7
L
86 'title': 'Defeat of Irybelda - アイリベルダの敗北',
87 'description': 'md5:70278abebe706647a8b4cb04cf23e0d3',
88 'uploader': 'Inwerwm',
89 'uploader_id': 'inwerwm',
90 'tags': 'count:1',
91 'like_count': 6133,
92 'view_count': 1050343,
93 'comment_count': 1,
94 'timestamp': 1677843869,
95 'modified_timestamp': 1679056362,
001a5fd3 96 },
0a5d7c39 97 'skip': 'this video cannot be played because of migration',
001a5fd3 98 }, {
c14af7a7 99 'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
0a5d7c39 100 'md5': '7645f966f069b8ec9210efd9130c9aad',
001a5fd3 101 'info_dict': {
c14af7a7 102 'id': '1ywe1sbkqwumpdxz5',
001a5fd3 103 'ext': 'mp4',
001a5fd3 104 'age_limit': 18,
0a5d7c39 105 'title': 'Aponia アポニア SEX Party Tonight 手の脱衣 巨乳 ',
106 'description': 'md5:3f60016fff22060eef1ef26d430b1f67',
107 'uploader': 'Lyu ya',
c14af7a7
L
108 'uploader_id': 'user792540',
109 'tags': [
add96eb9 110 'uncategorized',
c14af7a7 111 ],
0a5d7c39 112 'like_count': int,
113 'view_count': int,
114 'comment_count': int,
c14af7a7 115 'timestamp': 1678732213,
0a5d7c39 116 'modified_timestamp': int,
117 'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg',
118 'modified_date': '20230614',
119 'upload_date': '20230313',
001a5fd3 120 },
21b9413c 121 }, {
122 'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
123 'info_dict': {
124 'id': 'blggmfno8ghl725bg',
125 'ext': 'mp4',
126 'age_limit': 18,
127 'title': 'お外でおしっこしちゃう猫耳ロリメイド',
128 'description': 'md5:0342ba9bf6db09edbbb28729657c3611',
129 'uploader': 'Fe_Kurosabi',
130 'uploader_id': 'fekurosabi',
131 'tags': [
add96eb9 132 'pee',
21b9413c 133 ],
0a5d7c39 134 'like_count': int,
135 'view_count': int,
136 'comment_count': int,
21b9413c 137 'timestamp': 1598880567,
0a5d7c39 138 'modified_timestamp': int,
139 'upload_date': '20200831',
140 'modified_date': '20230605',
141 'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg',
142 # 'availability': 'needs_auth',
21b9413c 143 },
001a5fd3
YCH
144 }]
145
c14af7a7
L
146 def _extract_formats(self, video_id, fileurl):
147 up = urllib.parse.urlparse(fileurl)
148 q = urllib.parse.parse_qs(up.query)
149 paths = up.path.rstrip('/').split('/')
150 # https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
151 x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
152
56793f74 153 preference = qualities(['preview', '360', '540', 'Source'])
154
c14af7a7
L
155 files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
156 for fmt in files:
157 yield traverse_obj(fmt, {
158 'format_id': 'name',
159 'url': ('src', ('view', 'download'), {self._proto_relative_url}),
160 'ext': ('type', {mimetype2ext}),
56793f74 161 'quality': ('name', {preference}),
c14af7a7
L
162 'height': ('name', {int_or_none}),
163 }, get_all=False)
164
001a5fd3
YCH
165 def _real_extract(self, url):
166 video_id = self._match_id(url)
0a5d7c39 167 username, _ = self._get_login_info()
168 video_data = self._download_json(
169 f'https://api.iwara.tv/video/{video_id}', video_id,
170 expected_status=lambda x: True, headers=self._get_media_token())
95a383be
L
171 errmsg = video_data.get('message')
172 # at this point we can actually get uploaded user info, but do we need it?
173 if errmsg == 'errors.privateVideo':
0a5d7c39 174 self.raise_login_required('Private video. Login if you have permissions to watch', method='password')
21b9413c 175 elif errmsg == 'errors.notFound' and not username:
0a5d7c39 176 self.raise_login_required('Video may need login to view', method='password')
21b9413c 177 elif errmsg: # None if success
95a383be 178 raise ExtractorError(f'Iwara says: {errmsg}')
caf0f5f8 179
56793f74 180 if not video_data.get('fileUrl'):
181 if video_data.get('embedUrl'):
182 return self.url_result(video_data.get('embedUrl'))
183 raise ExtractorError('This video is unplayable', expected=True)
184
caf0f5f8 185 return {
001a5fd3 186 'id': video_id,
c14af7a7
L
187 'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0, # ecchi is 'sexy' in Japanese
188 **traverse_obj(video_data, {
189 'title': 'title',
190 'description': 'body',
191 'uploader': ('user', 'name'),
192 'uploader_id': ('user', 'username'),
193 'tags': ('tags', ..., 'id'),
194 'like_count': 'numLikes',
195 'view_count': 'numViews',
196 'comment_count': 'numComments',
197 'timestamp': ('createdAt', {unified_timestamp}),
198 'modified_timestamp': ('updatedAt', {unified_timestamp}),
199 'thumbnail': ('file', 'id', {str}, {
200 lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}),
201 }),
202 'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
caf0f5f8 203 }
ff4d7860 204
205
21b9413c 206class IwaraUserIE(IwaraBaseIE):
c14af7a7
L
207 _VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
208 IE_NAME = 'iwara:user'
209 _PER_PAGE = 32
ff4d7860 210
211 _TESTS = [{
c14af7a7 212 'url': 'https://iwara.tv/profile/user792540/videos',
ff4d7860 213 'info_dict': {
c14af7a7 214 'id': 'user792540',
0a5d7c39 215 'title': 'Lyu ya',
ff4d7860 216 },
0a5d7c39 217 'playlist_mincount': 70,
ff4d7860 218 }, {
c14af7a7 219 'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
ff4d7860 220 'info_dict': {
c14af7a7 221 'id': 'theblackbirdcalls',
0a5d7c39 222 'title': 'TheBlackbirdCalls',
ff4d7860 223 },
c14af7a7
L
224 'playlist_mincount': 723,
225 }, {
226 'url': 'https://iwara.tv/profile/user792540',
227 'only_matching': True,
228 }, {
229 'url': 'https://iwara.tv/profile/theblackbirdcalls',
230 'only_matching': True,
0a5d7c39 231 }, {
232 'url': 'https://www.iwara.tv/profile/lumymmd',
233 'info_dict': {
234 'id': 'lumymmd',
235 'title': 'Lumy MMD',
236 },
237 'playlist_mincount': 1,
ff4d7860 238 }]
239
c14af7a7
L
240 def _entries(self, playlist_id, user_id, page):
241 videos = self._download_json(
242 'https://api.iwara.tv/videos', playlist_id,
243 note=f'Downloading page {page}',
244 query={
245 'page': page,
246 'sort': 'date',
247 'user': user_id,
248 'limit': self._PER_PAGE,
0a5d7c39 249 }, headers=self._get_media_token())
c14af7a7
L
250 for x in traverse_obj(videos, ('results', ..., 'id')):
251 yield self.url_result(f'https://iwara.tv/video/{x}')
252
ff4d7860 253 def _real_extract(self, url):
c14af7a7
L
254 playlist_id = self._match_id(url)
255 user_info = self._download_json(
256 f'https://api.iwara.tv/profile/{playlist_id}', playlist_id,
257 note='Requesting user info')
258 user_id = traverse_obj(user_info, ('user', 'id'))
ff4d7860 259
c14af7a7
L
260 return self.playlist_result(
261 OnDemandPagedList(
262 functools.partial(self._entries, playlist_id, user_id),
263 self._PER_PAGE),
264 playlist_id, traverse_obj(user_info, ('user', 'name')))
ff4d7860 265
266
21b9413c 267class IwaraPlaylistIE(IwaraBaseIE):
c14af7a7
L
268 _VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
269 IE_NAME = 'iwara:playlist'
270 _PER_PAGE = 32
ff4d7860 271
272 _TESTS = [{
c14af7a7 273 'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f',
661e7253 274 'info_dict': {
c14af7a7 275 'id': '458e5486-36a4-4ac0-b233-7e9eef01025f',
661e7253 276 },
c14af7a7 277 'playlist_mincount': 3,
ff4d7860 278 }]
279
c14af7a7
L
280 def _entries(self, playlist_id, first_page, page):
281 videos = self._download_json(
282 'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
0a5d7c39 283 query={'page': page, 'limit': self._PER_PAGE},
284 headers=self._get_media_token()) if page else first_page
c14af7a7
L
285 for x in traverse_obj(videos, ('results', ..., 'id')):
286 yield self.url_result(f'https://iwara.tv/video/{x}')
ff4d7860 287
288 def _real_extract(self, url):
c14af7a7
L
289 playlist_id = self._match_id(url)
290 page_0 = self._download_json(
291 f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
0a5d7c39 292 note='Requesting playlist info', headers=self._get_media_token())
ff4d7860 293
ff4d7860 294 return self.playlist_result(
c14af7a7
L
295 OnDemandPagedList(
296 functools.partial(self._entries, playlist_id, page_0),
297 self._PER_PAGE),
298 playlist_id, traverse_obj(page_0, ('title', 'name')))