]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/iwara.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / iwara.py
1 import functools
2 import hashlib
3 import json
4 import time
5 import urllib.error
6 import urllib.parse
7
8 from .common import InfoExtractor
9 from ..utils import (
10 ExtractorError,
11 OnDemandPagedList,
12 int_or_none,
13 jwt_decode_hs256,
14 mimetype2ext,
15 qualities,
16 traverse_obj,
17 try_call,
18 unified_timestamp,
19 )
20
21
22 class IwaraBaseIE(InfoExtractor):
23 _NETRC_MACHINE = 'iwara'
24 _USERTOKEN = None
25 _MEDIATOKEN = None
26
27 def _is_token_expired(self, token, token_type):
28 # User token TTL == ~3 weeks, Media token TTL == ~1 hour
29 if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120):
30 self.to_screen(f'{token_type} token has expired')
31 return True
32
33 def _get_user_token(self):
34 username, password = self._get_login_info()
35 if not username or not password:
36 return
37
38 user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username)
39 if not user_token or self._is_token_expired(user_token, 'User'):
40 response = self._download_json(
41 'https://api.iwara.tv/user/login', None, note='Logging in',
42 headers={'Content-Type': 'application/json'}, data=json.dumps({
43 'email': username,
44 'password': password
45 }).encode(), expected_status=lambda x: True)
46 user_token = traverse_obj(response, ('token', {str}))
47 if not user_token:
48 error = traverse_obj(response, ('message', {str}))
49 if 'invalidLogin' in error:
50 raise ExtractorError('Invalid login credentials', expected=True)
51 else:
52 raise ExtractorError(f'Iwara API said: {error or "nothing"}')
53
54 self.cache.store(self._NETRC_MACHINE, username, user_token)
55
56 IwaraBaseIE._USERTOKEN = user_token
57
58 def _get_media_token(self):
59 self._get_user_token()
60 if not IwaraBaseIE._USERTOKEN:
61 return # user has not passed credentials
62
63 if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'):
64 IwaraBaseIE._MEDIATOKEN = self._download_json(
65 'https://api.iwara.tv/user/token', None, note='Fetching media token',
66 data=b'', headers={
67 'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}',
68 'Content-Type': 'application/json'
69 })['accessToken']
70
71 return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'}
72
73 def _perform_login(self, username, password):
74 self._get_media_token()
75
76
77 class IwaraIE(IwaraBaseIE):
78 IE_NAME = 'iwara'
79 _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
80 _TESTS = [{
81 'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
82 'info_dict': {
83 'id': 'k2ayoueezfkx6gvq',
84 'ext': 'mp4',
85 'age_limit': 18,
86 'title': 'Defeat of Irybelda - アイリベルダの敗北',
87 'description': 'md5:70278abebe706647a8b4cb04cf23e0d3',
88 'uploader': 'Inwerwm',
89 'uploader_id': 'inwerwm',
90 'tags': 'count:1',
91 'like_count': 6133,
92 'view_count': 1050343,
93 'comment_count': 1,
94 'timestamp': 1677843869,
95 'modified_timestamp': 1679056362,
96 },
97 'skip': 'this video cannot be played because of migration',
98 }, {
99 'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
100 'md5': '7645f966f069b8ec9210efd9130c9aad',
101 'info_dict': {
102 'id': '1ywe1sbkqwumpdxz5',
103 'ext': 'mp4',
104 'age_limit': 18,
105 'title': 'Aponia アポニア SEX Party Tonight 手の脱衣 巨乳 ',
106 'description': 'md5:3f60016fff22060eef1ef26d430b1f67',
107 'uploader': 'Lyu ya',
108 'uploader_id': 'user792540',
109 'tags': [
110 'uncategorized'
111 ],
112 'like_count': int,
113 'view_count': int,
114 'comment_count': int,
115 'timestamp': 1678732213,
116 'modified_timestamp': int,
117 'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg',
118 'modified_date': '20230614',
119 'upload_date': '20230313',
120 },
121 }, {
122 'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
123 'info_dict': {
124 'id': 'blggmfno8ghl725bg',
125 'ext': 'mp4',
126 'age_limit': 18,
127 'title': 'お外でおしっこしちゃう猫耳ロリメイド',
128 'description': 'md5:0342ba9bf6db09edbbb28729657c3611',
129 'uploader': 'Fe_Kurosabi',
130 'uploader_id': 'fekurosabi',
131 'tags': [
132 'pee'
133 ],
134 'like_count': int,
135 'view_count': int,
136 'comment_count': int,
137 'timestamp': 1598880567,
138 'modified_timestamp': int,
139 'upload_date': '20200831',
140 'modified_date': '20230605',
141 'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg',
142 # 'availability': 'needs_auth',
143 },
144 }]
145
146 def _extract_formats(self, video_id, fileurl):
147 up = urllib.parse.urlparse(fileurl)
148 q = urllib.parse.parse_qs(up.query)
149 paths = up.path.rstrip('/').split('/')
150 # https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
151 x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
152
153 preference = qualities(['preview', '360', '540', 'Source'])
154
155 files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
156 for fmt in files:
157 yield traverse_obj(fmt, {
158 'format_id': 'name',
159 'url': ('src', ('view', 'download'), {self._proto_relative_url}),
160 'ext': ('type', {mimetype2ext}),
161 'quality': ('name', {preference}),
162 'height': ('name', {int_or_none}),
163 }, get_all=False)
164
165 def _real_extract(self, url):
166 video_id = self._match_id(url)
167 username, _ = self._get_login_info()
168 video_data = self._download_json(
169 f'https://api.iwara.tv/video/{video_id}', video_id,
170 expected_status=lambda x: True, headers=self._get_media_token())
171 errmsg = video_data.get('message')
172 # at this point we can actually get uploaded user info, but do we need it?
173 if errmsg == 'errors.privateVideo':
174 self.raise_login_required('Private video. Login if you have permissions to watch', method='password')
175 elif errmsg == 'errors.notFound' and not username:
176 self.raise_login_required('Video may need login to view', method='password')
177 elif errmsg: # None if success
178 raise ExtractorError(f'Iwara says: {errmsg}')
179
180 if not video_data.get('fileUrl'):
181 if video_data.get('embedUrl'):
182 return self.url_result(video_data.get('embedUrl'))
183 raise ExtractorError('This video is unplayable', expected=True)
184
185 return {
186 'id': video_id,
187 'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0, # ecchi is 'sexy' in Japanese
188 **traverse_obj(video_data, {
189 'title': 'title',
190 'description': 'body',
191 'uploader': ('user', 'name'),
192 'uploader_id': ('user', 'username'),
193 'tags': ('tags', ..., 'id'),
194 'like_count': 'numLikes',
195 'view_count': 'numViews',
196 'comment_count': 'numComments',
197 'timestamp': ('createdAt', {unified_timestamp}),
198 'modified_timestamp': ('updatedAt', {unified_timestamp}),
199 'thumbnail': ('file', 'id', {str}, {
200 lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}),
201 }),
202 'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
203 }
204
205
206 class IwaraUserIE(IwaraBaseIE):
207 _VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
208 IE_NAME = 'iwara:user'
209 _PER_PAGE = 32
210
211 _TESTS = [{
212 'url': 'https://iwara.tv/profile/user792540/videos',
213 'info_dict': {
214 'id': 'user792540',
215 'title': 'Lyu ya',
216 },
217 'playlist_mincount': 70,
218 }, {
219 'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
220 'info_dict': {
221 'id': 'theblackbirdcalls',
222 'title': 'TheBlackbirdCalls',
223 },
224 'playlist_mincount': 723,
225 }, {
226 'url': 'https://iwara.tv/profile/user792540',
227 'only_matching': True,
228 }, {
229 'url': 'https://iwara.tv/profile/theblackbirdcalls',
230 'only_matching': True,
231 }, {
232 'url': 'https://www.iwara.tv/profile/lumymmd',
233 'info_dict': {
234 'id': 'lumymmd',
235 'title': 'Lumy MMD',
236 },
237 'playlist_mincount': 1,
238 }]
239
240 def _entries(self, playlist_id, user_id, page):
241 videos = self._download_json(
242 'https://api.iwara.tv/videos', playlist_id,
243 note=f'Downloading page {page}',
244 query={
245 'page': page,
246 'sort': 'date',
247 'user': user_id,
248 'limit': self._PER_PAGE,
249 }, headers=self._get_media_token())
250 for x in traverse_obj(videos, ('results', ..., 'id')):
251 yield self.url_result(f'https://iwara.tv/video/{x}')
252
253 def _real_extract(self, url):
254 playlist_id = self._match_id(url)
255 user_info = self._download_json(
256 f'https://api.iwara.tv/profile/{playlist_id}', playlist_id,
257 note='Requesting user info')
258 user_id = traverse_obj(user_info, ('user', 'id'))
259
260 return self.playlist_result(
261 OnDemandPagedList(
262 functools.partial(self._entries, playlist_id, user_id),
263 self._PER_PAGE),
264 playlist_id, traverse_obj(user_info, ('user', 'name')))
265
266
267 class IwaraPlaylistIE(IwaraBaseIE):
268 _VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
269 IE_NAME = 'iwara:playlist'
270 _PER_PAGE = 32
271
272 _TESTS = [{
273 'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f',
274 'info_dict': {
275 'id': '458e5486-36a4-4ac0-b233-7e9eef01025f',
276 },
277 'playlist_mincount': 3,
278 }]
279
280 def _entries(self, playlist_id, first_page, page):
281 videos = self._download_json(
282 'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
283 query={'page': page, 'limit': self._PER_PAGE},
284 headers=self._get_media_token()) if page else first_page
285 for x in traverse_obj(videos, ('results', ..., 'id')):
286 yield self.url_result(f'https://iwara.tv/video/{x}')
287
288 def _real_extract(self, url):
289 playlist_id = self._match_id(url)
290 page_0 = self._download_json(
291 f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
292 note='Requesting playlist info', headers=self._get_media_token())
293
294 return self.playlist_result(
295 OnDemandPagedList(
296 functools.partial(self._entries, playlist_id, page_0),
297 self._PER_PAGE),
298 playlist_id, traverse_obj(page_0, ('title', 'name')))