]>
Commit | Line | Data |
---|---|---|
aed945e1 | 1 | import json |
2 | import urllib.error | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | format_field, | |
8 | parse_iso8601, | |
9 | traverse_obj, | |
10 | url_or_none, | |
11 | ) | |
12 | ||
13 | ||
14 | class WykopBaseExtractor(InfoExtractor): | |
15 | def _get_token(self, force_refresh=False): | |
16 | if not force_refresh: | |
17 | maybe_cached = self.cache.load('wykop', 'bearer') | |
18 | if maybe_cached: | |
19 | return maybe_cached | |
20 | ||
21 | new_token = traverse_obj( | |
22 | self._do_call_api('auth', None, 'Downloading anonymous auth token', data={ | |
23 | # hardcoded in frontend | |
24 | 'key': 'w53947240748', | |
25 | 'secret': 'd537d9e0a7adc1510842059ae5316419', | |
26 | }), ('data', 'token')) | |
27 | ||
28 | self.cache.store('wykop', 'bearer', new_token) | |
29 | return new_token | |
30 | ||
31 | def _do_call_api(self, path, video_id, note='Downloading JSON metadata', data=None, headers={}): | |
32 | if data: | |
33 | data = json.dumps({'data': data}).encode() | |
34 | headers['Content-Type'] = 'application/json' | |
35 | ||
36 | return self._download_json( | |
37 | f'https://wykop.pl/api/v3/{path}', video_id, | |
38 | note=note, data=data, headers=headers) | |
39 | ||
40 | def _call_api(self, path, video_id, note='Downloading JSON metadata'): | |
41 | token = self._get_token() | |
42 | for retrying in range(2): | |
43 | try: | |
44 | return self._do_call_api(path, video_id, note, headers={'Authorization': f'Bearer {token}'}) | |
45 | except ExtractorError as e: | |
46 | if not retrying and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403: | |
47 | token = self._get_token(True) | |
48 | continue | |
49 | raise | |
50 | ||
51 | def _common_data_extract(self, data): | |
52 | author = traverse_obj(data, ('author', 'username'), expected_type=str) | |
53 | ||
54 | return { | |
55 | '_type': 'url_transparent', | |
56 | 'display_id': data.get('slug'), | |
57 | 'url': traverse_obj(data, | |
58 | ('media', 'embed', 'url'), # what gets an iframe embed | |
59 | ('source', 'url'), # clickable url (dig only) | |
60 | expected_type=url_or_none), | |
61 | 'thumbnail': traverse_obj( | |
62 | data, ('media', 'photo', 'url'), ('media', 'embed', 'thumbnail'), expected_type=url_or_none), | |
63 | 'uploader': author, | |
64 | 'uploader_id': author, | |
65 | 'uploader_url': format_field(author, None, 'https://wykop.pl/ludzie/%s'), | |
66 | 'timestamp': parse_iso8601(data.get('created_at'), delimiter=' '), # time it got submitted | |
67 | 'like_count': traverse_obj(data, ('votes', 'up'), expected_type=int), | |
68 | 'dislike_count': traverse_obj(data, ('votes', 'down'), expected_type=int), | |
69 | 'comment_count': traverse_obj(data, ('comments', 'count'), expected_type=int), | |
70 | 'age_limit': 18 if data.get('adult') else 0, | |
71 | 'tags': data.get('tags'), | |
72 | } | |
73 | ||
74 | ||
75 | class WykopDigIE(WykopBaseExtractor): | |
76 | IE_NAME = 'wykop:dig' | |
77 | _VALID_URL = r'https?://(?:www\.)?wykop\.pl/link/(?P<id>\d+)' | |
78 | ||
79 | _TESTS = [{ | |
80 | 'url': 'https://wykop.pl/link/6912923/najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth', | |
81 | 'info_dict': { | |
82 | 'id': 'rlSTBvViflc', | |
83 | 'ext': 'mp4', | |
84 | 'title': 'Najbardziej zrzędliwy kot na świecie I Frozen Planet II I BBC Earth', | |
85 | 'display_id': 'najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth', | |
86 | 'description': 'md5:ac0f87dea1cdcb6b0c53f3612a095c87', | |
87 | 'tags': ['zwierzaczki', 'koty', 'smiesznykotek', 'humor', 'rozrywka', 'ciekawostki'], | |
88 | 'age_limit': 0, | |
89 | 'timestamp': 1669154480, | |
90 | 'release_timestamp': 1669194241, | |
91 | 'release_date': '20221123', | |
92 | 'uploader': 'starnak', | |
93 | 'uploader_id': 'starnak', | |
94 | 'uploader_url': 'https://wykop.pl/ludzie/starnak', | |
95 | 'like_count': int, | |
96 | 'dislike_count': int, | |
97 | 'comment_count': int, | |
98 | 'thumbnail': r're:https?://wykop\.pl/cdn/.+', | |
99 | 'view_count': int, | |
100 | 'channel': 'BBC Earth', | |
101 | 'channel_id': 'UCwmZiChSryoWQCZMIQezgTg', | |
102 | 'channel_url': 'https://www.youtube.com/channel/UCwmZiChSryoWQCZMIQezgTg', | |
103 | 'categories': ['Pets & Animals'], | |
104 | 'upload_date': '20220923', | |
105 | 'duration': 191, | |
106 | 'channel_follower_count': int, | |
107 | 'availability': 'public', | |
108 | 'live_status': 'not_live', | |
109 | 'playable_in_embed': True, | |
110 | }, | |
111 | }] | |
112 | ||
113 | @classmethod | |
114 | def suitable(cls, url): | |
115 | return cls._match_valid_url(url) and not WykopDigCommentIE.suitable(url) | |
116 | ||
117 | def _real_extract(self, url): | |
118 | video_id = self._match_id(url) | |
119 | data = self._call_api(f'links/{video_id}', video_id)['data'] | |
120 | ||
121 | return { | |
122 | **self._common_data_extract(data), | |
123 | 'id': video_id, | |
124 | 'title': data['title'], | |
125 | 'description': data.get('description'), | |
126 | # time it got "digged" to the homepage | |
127 | 'release_timestamp': parse_iso8601(data.get('published_at'), delimiter=' '), | |
128 | } | |
129 | ||
130 | ||
131 | class WykopDigCommentIE(WykopBaseExtractor): | |
132 | IE_NAME = 'wykop:dig:comment' | |
133 | _VALID_URL = r'https?://(?:www\.)?wykop\.pl/link/(?P<dig_id>\d+)/[^/]+/komentarz/(?P<id>\d+)' | |
134 | ||
135 | _TESTS = [{ | |
136 | 'url': 'https://wykop.pl/link/6992589/strollowal-oszusta-przez-ponad-24-minuty-udawal-naiwniaka-i-nagral-rozmowe/komentarz/114540527/podobna-sytuacja-ponizej-ciekawa-dyskusja-z-oszustem-na-sam-koniec-sam-bylem-w-biurze-swiadkiem-podobnej-rozmowy-niemal-zakonczonej-sukcesem-bandyty-g', | |
137 | 'info_dict': { | |
138 | 'id': 'u6tEi2FmKZY', | |
139 | 'ext': 'mp4', | |
140 | 'title': 'md5:e7c741c5baa7ed6478000caf72865577', | |
141 | 'display_id': 'md5:45b2d12bd0e262d09cc7cf7abc8412db', | |
142 | 'description': 'md5:bcec7983429f9c0630f9deb9d3d1ba5e', | |
143 | 'timestamp': 1674476945, | |
144 | 'uploader': 'Bartholomew', | |
145 | 'uploader_id': 'Bartholomew', | |
146 | 'uploader_url': 'https://wykop.pl/ludzie/Bartholomew', | |
147 | 'thumbnail': r're:https?://wykop\.pl/cdn/.+', | |
148 | 'tags': [], | |
149 | 'availability': 'public', | |
150 | 'duration': 1838, | |
151 | 'upload_date': '20230117', | |
152 | 'categories': ['Entertainment'], | |
153 | 'view_count': int, | |
154 | 'like_count': int, | |
155 | 'dislike_count': int, | |
156 | 'comment_count': int, | |
157 | 'channel_follower_count': int, | |
158 | 'playable_in_embed': True, | |
159 | 'live_status': 'not_live', | |
160 | 'age_limit': 0, | |
161 | 'chapters': 'count:3', | |
162 | 'channel': 'Poszukiwacze Okazji', | |
163 | 'channel_id': 'UCzzvJDZThwv06dR4xmzrZBw', | |
164 | 'channel_url': 'https://www.youtube.com/channel/UCzzvJDZThwv06dR4xmzrZBw', | |
165 | }, | |
166 | }] | |
167 | ||
168 | def _real_extract(self, url): | |
169 | dig_id, comment_id = self._search_regex(self._VALID_URL, url, 'dig and comment ids', group=('dig_id', 'id')) | |
170 | data = self._call_api(f'links/{dig_id}/comments/{comment_id}', comment_id)['data'] | |
171 | ||
172 | return { | |
173 | **self._common_data_extract(data), | |
174 | 'id': comment_id, | |
175 | 'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}", | |
176 | 'description': data.get('content'), | |
177 | } | |
178 | ||
179 | ||
180 | class WykopPostIE(WykopBaseExtractor): | |
181 | IE_NAME = 'wykop:post' | |
182 | _VALID_URL = r'https?://(?:www\.)?wykop\.pl/wpis/(?P<id>\d+)' | |
183 | ||
184 | _TESTS = [{ | |
185 | 'url': 'https://wykop.pl/wpis/68893343/kot-koty-smiesznykotek', | |
186 | 'info_dict': { | |
187 | 'id': 'PL8JMjiUPHUhwc9ZlKa_5IFeBwBV8Xe7jI', | |
188 | 'title': 'PawelW124 - #kot #koty #smiesznykotek', | |
189 | 'description': '#kot #koty #smiesznykotek', | |
190 | 'display_id': 'kot-koty-smiesznykotek', | |
191 | 'tags': ['kot', 'koty', 'smiesznykotek'], | |
192 | 'uploader': 'PawelW124', | |
193 | 'uploader_id': 'PawelW124', | |
194 | 'uploader_url': 'https://wykop.pl/ludzie/PawelW124', | |
195 | 'timestamp': 1668938142, | |
196 | 'age_limit': 0, | |
197 | 'like_count': int, | |
198 | 'dislike_count': int, | |
199 | 'thumbnail': r're:https?://wykop\.pl/cdn/.+', | |
200 | 'comment_count': int, | |
201 | 'channel': 'Revan', | |
202 | 'channel_id': 'UCW9T_-uZoiI7ROARQdTDyOw', | |
203 | 'channel_url': 'https://www.youtube.com/channel/UCW9T_-uZoiI7ROARQdTDyOw', | |
204 | 'upload_date': '20221120', | |
205 | 'modified_date': '20220814', | |
206 | 'availability': 'public', | |
207 | 'view_count': int, | |
208 | }, | |
209 | 'playlist_mincount': 15, | |
210 | 'params': { | |
211 | 'flat_playlist': True, | |
212 | } | |
213 | }] | |
214 | ||
215 | @classmethod | |
216 | def suitable(cls, url): | |
217 | return cls._match_valid_url(url) and not WykopPostCommentIE.suitable(url) | |
218 | ||
219 | def _real_extract(self, url): | |
220 | video_id = self._match_id(url) | |
221 | data = self._call_api(f'entries/{video_id}', video_id)['data'] | |
222 | ||
223 | return { | |
224 | **self._common_data_extract(data), | |
225 | 'id': video_id, | |
226 | 'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}", | |
227 | 'description': data.get('content'), | |
228 | } | |
229 | ||
230 | ||
231 | class WykopPostCommentIE(WykopBaseExtractor): | |
232 | IE_NAME = 'wykop:post:comment' | |
233 | _VALID_URL = r'https?://(?:www\.)?wykop\.pl/wpis/(?P<post_id>\d+)/[^/#]+#(?P<id>\d+)' | |
234 | ||
235 | _TESTS = [{ | |
236 | 'url': 'https://wykop.pl/wpis/70084873/test-test-test#249303979', | |
237 | 'info_dict': { | |
238 | 'id': 'confusedquickarmyant', | |
239 | 'ext': 'mp4', | |
240 | 'title': 'tpap - treść komentarza', | |
241 | 'display_id': 'tresc-komentarza', | |
242 | 'description': 'treść komentarza', | |
243 | 'uploader': 'tpap', | |
244 | 'uploader_id': 'tpap', | |
245 | 'uploader_url': 'https://wykop.pl/ludzie/tpap', | |
246 | 'timestamp': 1675349470, | |
247 | 'upload_date': '20230202', | |
248 | 'tags': [], | |
249 | 'duration': 2.12, | |
250 | 'age_limit': 0, | |
251 | 'categories': [], | |
252 | 'view_count': int, | |
253 | 'like_count': int, | |
254 | 'dislike_count': int, | |
255 | 'thumbnail': r're:https?://wykop\.pl/cdn/.+', | |
256 | }, | |
257 | }] | |
258 | ||
259 | def _real_extract(self, url): | |
260 | post_id, comment_id = self._search_regex(self._VALID_URL, url, 'post and comment ids', group=('post_id', 'id')) | |
261 | data = self._call_api(f'entries/{post_id}/comments/{comment_id}', comment_id)['data'] | |
262 | ||
263 | return { | |
264 | **self._common_data_extract(data), | |
265 | 'id': comment_id, | |
266 | 'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}", | |
267 | 'description': data.get('content'), | |
268 | } |