]>
Commit | Line | Data |
---|---|---|
65de7d20 | 1 | import functools |
3bf57053 PH |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
65de7d20 SS |
6 | ExtractorError, |
7 | determine_ext, | |
8 | float_or_none, | |
3bf57053 PH |
9 | int_or_none, |
10 | js_to_json, | |
11 | mimetype2ext, | |
65de7d20 SS |
12 | parse_iso8601, |
13 | str_or_none, | |
14 | strip_or_none, | |
15 | traverse_obj, | |
16 | url_or_none, | |
3bf57053 PH |
17 | ) |
18 | ||
b88ba053 | 19 | |
65de7d20 SS |
20 | class ImgurBaseIE(InfoExtractor): |
21 | _CLIENT_ID = '546c25a59c58ad7' | |
22 | ||
23 | @classmethod | |
24 | def _imgur_result(cls, item_id): | |
25 | return cls.url_result(f'https://imgur.com/{item_id}', ImgurIE, item_id) | |
26 | ||
27 | def _call_api(self, endpoint, video_id, **kwargs): | |
28 | return self._download_json( | |
29 | f'https://api.imgur.com/post/v1/{endpoint}/{video_id}?client_id={self._CLIENT_ID}&include=media,account', | |
30 | video_id, **kwargs) | |
31 | ||
32 | @staticmethod | |
33 | def get_description(s): | |
34 | if 'Discover the magic of the internet at Imgur' in s: | |
35 | return None | |
36 | return s or None | |
37 | ||
38 | ||
39 | class ImgurIE(ImgurBaseIE): | |
40 | _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)' | |
3bf57053 PH |
41 | |
42 | _TESTS = [{ | |
65de7d20 | 43 | 'url': 'https://imgur.com/A61SaA1', |
3bf57053 PH |
44 | 'info_dict': { |
45 | 'id': 'A61SaA1', | |
46 | 'ext': 'mp4', | |
65de7d20 SS |
47 | 'title': 'MRW gifv is up and running without any bugs', |
48 | 'timestamp': 1416446068, | |
49 | 'upload_date': '20141120', | |
50 | 'dislike_count': int, | |
51 | 'comment_count': int, | |
52 | 'release_timestamp': 1416446068, | |
53 | 'release_date': '20141120', | |
54 | 'like_count': int, | |
55 | 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', | |
3bf57053 | 56 | }, |
1a13940c | 57 | }, { |
65de7d20 | 58 | 'url': 'https://i.imgur.com/A61SaA1.gifv', |
e1a0b3b8 | 59 | 'only_matching': True, |
905eef2b JW |
60 | }, { |
61 | 'url': 'https://i.imgur.com/crGpqCV.mp4', | |
62 | 'only_matching': True, | |
6f5c1807 | 63 | }, { |
6f5c1807 | 64 | 'url': 'https://i.imgur.com/jxBXAMC.gifv', |
65de7d20 SS |
65 | 'info_dict': { |
66 | 'id': 'jxBXAMC', | |
67 | 'ext': 'mp4', | |
68 | 'title': 'Fahaka puffer feeding', | |
69 | 'timestamp': 1533835503, | |
70 | 'upload_date': '20180809', | |
71 | 'release_date': '20180809', | |
72 | 'like_count': int, | |
73 | 'duration': 30.0, | |
74 | 'comment_count': int, | |
75 | 'release_timestamp': 1533835503, | |
76 | 'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg', | |
77 | 'dislike_count': int, | |
78 | }, | |
86d2f4d2 T |
79 | }, { |
80 | # needs Accept header, ref: https://github.com/yt-dlp/yt-dlp/issues/9458 | |
81 | 'url': 'https://imgur.com/zV03bd5', | |
82 | 'md5': '59df97884e8ba76143ff6b640a0e2904', | |
83 | 'info_dict': { | |
84 | 'id': 'zV03bd5', | |
85 | 'ext': 'mp4', | |
86 | 'title': 'Ive - Liz', | |
87 | 'timestamp': 1710491255, | |
88 | 'upload_date': '20240315', | |
89 | 'like_count': int, | |
90 | 'dislike_count': int, | |
91 | 'duration': 56.92, | |
92 | 'comment_count': int, | |
93 | 'release_timestamp': 1710491255, | |
94 | 'release_date': '20240315', | |
95 | }, | |
3bf57053 PH |
96 | }] |
97 | ||
98 | def _real_extract(self, url): | |
99 | video_id = self._match_id(url) | |
65de7d20 SS |
100 | data = self._call_api('media', video_id) |
101 | if not traverse_obj(data, ('media', 0, ( | |
102 | ('type', {lambda t: t == 'video' or None}), | |
103 | ('metadata', 'is_animated'))), get_all=False): | |
104 | raise ExtractorError(f'{video_id} is not a video or animated image', expected=True) | |
5f47a60c | 105 | webpage = self._download_webpage( |
65de7d20 SS |
106 | f'https://i.imgur.com/{video_id}.gifv', video_id, fatal=False) or '' |
107 | formats = [] | |
3bf57053 | 108 | |
65de7d20 SS |
109 | media_fmt = traverse_obj(data, ('media', 0, { |
110 | 'url': ('url', {url_or_none}), | |
111 | 'ext': ('ext', {str}), | |
112 | 'width': ('width', {int_or_none}), | |
113 | 'height': ('height', {int_or_none}), | |
114 | 'filesize': ('size', {int_or_none}), | |
115 | 'acodec': ('metadata', 'has_sound', {lambda b: None if b else 'none'}), | |
116 | })) | |
117 | media_url = media_fmt.get('url') | |
118 | if media_url: | |
119 | if not media_fmt.get('ext'): | |
120 | media_fmt['ext'] = mimetype2ext(traverse_obj( | |
121 | data, ('media', 0, 'mime_type'))) or determine_ext(media_url) | |
122 | if traverse_obj(data, ('media', 0, 'type')) == 'image': | |
123 | media_fmt['acodec'] = 'none' | |
124 | media_fmt.setdefault('preference', -10) | |
125 | formats.append(media_fmt) | |
3bf57053 | 126 | |
b88ba053 | 127 | video_elements = self._search_regex( |
3bf57053 | 128 | r'(?s)<div class="video-elements">(.*?)</div>', |
b88ba053 | 129 | webpage, 'video elements', default=None) |
9e2d7dca | 130 | |
65de7d20 SS |
131 | if video_elements: |
132 | def og_get_size(media_type): | |
133 | return { | |
134 | p: int_or_none(self._og_search_property(f'{media_type}:{p}', webpage, default=None)) | |
135 | for p in ('width', 'height') | |
136 | } | |
137 | ||
138 | size = og_get_size('video') | |
139 | if not any(size.values()): | |
140 | size = og_get_size('image') | |
141 | ||
142 | formats = traverse_obj( | |
143 | re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements), | |
144 | (..., { | |
145 | 'format_id': ('type', {lambda s: s.partition('/')[2]}), | |
146 | 'url': ('src', {self._proto_relative_url}), | |
147 | 'ext': ('type', {mimetype2ext}), | |
148 | })) | |
149 | for f in formats: | |
150 | f.update(size) | |
3bf57053 | 151 | |
65de7d20 SS |
152 | # We can get the original gif format from the webpage as well |
153 | gif_json = traverse_obj(self._search_json( | |
154 | r'var\s+videoItem\s*=', webpage, 'GIF info', video_id, | |
155 | transform_source=js_to_json, fatal=False), { | |
156 | 'url': ('gifUrl', {self._proto_relative_url}), | |
157 | 'filesize': ('size', {int_or_none}), | |
3bf57053 | 158 | }) |
65de7d20 SS |
159 | if gif_json: |
160 | gif_json.update(size) | |
161 | gif_json.update({ | |
162 | 'format_id': 'gif', | |
163 | 'preference': -10, # gifs < videos | |
164 | 'ext': 'gif', | |
165 | 'acodec': 'none', | |
166 | 'vcodec': 'gif', | |
167 | 'container': 'gif', | |
168 | }) | |
169 | formats.append(gif_json) | |
170 | ||
171 | search = functools.partial(self._html_search_meta, html=webpage, default=None) | |
172 | ||
173 | twitter_fmt = { | |
174 | 'format_id': 'twitter', | |
175 | 'url': url_or_none(search('twitter:player:stream')), | |
176 | 'ext': mimetype2ext(search('twitter:player:stream:content_type')), | |
177 | 'width': int_or_none(search('twitter:width')), | |
178 | 'height': int_or_none(search('twitter:height')), | |
179 | } | |
180 | if twitter_fmt['url']: | |
181 | formats.append(twitter_fmt) | |
182 | ||
183 | if not formats: | |
184 | self.raise_no_formats( | |
185 | f'No sources found for video {video_id}. Maybe a plain image?', expected=True) | |
186 | self._remove_duplicate_formats(formats) | |
3bf57053 | 187 | |
3bf57053 | 188 | return { |
65de7d20 SS |
189 | 'title': self._og_search_title(webpage, default=None), |
190 | 'description': self.get_description(self._og_search_description(webpage, default='')), | |
191 | **traverse_obj(data, { | |
192 | 'uploader_id': ('account_id', {lambda a: str(a) if int_or_none(a) else None}), | |
193 | 'uploader': ('account', 'username', {lambda x: strip_or_none(x) or None}), | |
194 | 'uploader_url': ('account', 'avatar_url', {url_or_none}), | |
195 | 'like_count': ('upvote_count', {int_or_none}), | |
196 | 'dislike_count': ('downvote_count', {int_or_none}), | |
197 | 'comment_count': ('comment_count', {int_or_none}), | |
198 | 'age_limit': ('is_mature', {lambda x: 18 if x else None}), | |
199 | 'timestamp': (('updated_at', 'created_at'), {parse_iso8601}), | |
200 | 'release_timestamp': ('created_at', {parse_iso8601}), | |
201 | }, get_all=False), | |
202 | **traverse_obj(data, ('media', 0, 'metadata', { | |
203 | 'title': ('title', {lambda x: strip_or_none(x) or None}), | |
204 | 'description': ('description', {self.get_description}), | |
205 | 'duration': ('duration', {float_or_none}), | |
206 | 'timestamp': (('updated_at', 'created_at'), {parse_iso8601}), | |
207 | 'release_timestamp': ('created_at', {parse_iso8601}), | |
208 | }), get_all=False), | |
3bf57053 PH |
209 | 'id': video_id, |
210 | 'formats': formats, | |
65de7d20 | 211 | 'thumbnail': url_or_none(search('thumbnailUrl')), |
86d2f4d2 | 212 | 'http_headers': {'Accept': '*/*'}, |
3bf57053 | 213 | } |
8875b3d5 S |
214 | |
215 | ||
65de7d20 SS |
216 | class ImgurGalleryBaseIE(ImgurBaseIE): |
217 | _GALLERY = True | |
218 | ||
219 | def _real_extract(self, url): | |
220 | gallery_id = self._match_id(url) | |
221 | ||
222 | data = self._call_api('albums', gallery_id, fatal=False, expected_status=404) | |
223 | ||
224 | info = traverse_obj(data, { | |
225 | 'title': ('title', {lambda x: strip_or_none(x) or None}), | |
226 | 'description': ('description', {self.get_description}), | |
227 | }) | |
228 | ||
229 | if traverse_obj(data, 'is_album'): | |
230 | ||
231 | def yield_media_ids(): | |
232 | for m_id in traverse_obj(data, ( | |
233 | 'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'], | |
234 | 'id', {lambda x: str_or_none(x) or None})): | |
235 | yield m_id | |
236 | ||
237 | # if a gallery with exactly one video, apply album metadata to video | |
238 | media_id = ( | |
239 | self._GALLERY | |
240 | and traverse_obj(data, ('image_count', {lambda c: c == 1})) | |
241 | and next(yield_media_ids(), None)) | |
242 | ||
243 | if not media_id: | |
244 | result = self.playlist_result( | |
245 | map(self._imgur_result, yield_media_ids()), gallery_id) | |
246 | result.update(info) | |
247 | return result | |
248 | gallery_id = media_id | |
249 | ||
250 | result = self._imgur_result(gallery_id) | |
251 | info['_type'] = 'url_transparent' | |
252 | result.update(info) | |
253 | return result | |
254 | ||
255 | ||
256 | class ImgurGalleryIE(ImgurGalleryBaseIE): | |
5f47a60c | 257 | IE_NAME = 'imgur:gallery' |
65de7d20 | 258 | _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)' |
8875b3d5 | 259 | |
774ce355 | 260 | _TESTS = [{ |
8875b3d5 S |
261 | 'url': 'http://imgur.com/gallery/Q95ko', |
262 | 'info_dict': { | |
263 | 'id': 'Q95ko', | |
5f47a60c | 264 | 'title': 'Adding faces make every GIF better', |
8875b3d5 S |
265 | }, |
266 | 'playlist_count': 25, | |
65de7d20 | 267 | 'skip': 'Zoinks! You\'ve taken a wrong turn.', |
774ce355 | 268 | }, { |
65de7d20 | 269 | # TODO: static images - replace with animated/video gallery |
5f47a60c | 270 | 'url': 'http://imgur.com/topic/Aww/ll5Vk', |
774ce355 S |
271 | 'only_matching': True, |
272 | }, { | |
5f47a60c | 273 | 'url': 'https://imgur.com/gallery/YcAQlkx', |
65de7d20 | 274 | 'add_ies': ['Imgur'], |
5f47a60c RA |
275 | 'info_dict': { |
276 | 'id': 'YcAQlkx', | |
277 | 'ext': 'mp4', | |
278 | 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', | |
65de7d20 SS |
279 | 'timestamp': 1358554297, |
280 | 'upload_date': '20130119', | |
281 | 'uploader_id': '1648642', | |
282 | 'uploader': 'wittyusernamehere', | |
283 | 'release_timestamp': 1358554297, | |
284 | 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', | |
285 | 'release_date': '20130119', | |
286 | 'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand', | |
287 | 'comment_count': int, | |
288 | 'dislike_count': int, | |
289 | 'like_count': int, | |
290 | }, | |
5f47a60c | 291 | }, { |
65de7d20 | 292 | # TODO: static image - replace with animated/video gallery |
5f47a60c RA |
293 | 'url': 'http://imgur.com/topic/Funny/N8rOudd', |
294 | 'only_matching': True, | |
295 | }, { | |
296 | 'url': 'http://imgur.com/r/aww/VQcQPhM', | |
65de7d20 SS |
297 | 'add_ies': ['Imgur'], |
298 | 'info_dict': { | |
299 | 'id': 'VQcQPhM', | |
300 | 'ext': 'mp4', | |
301 | 'title': 'The boss is here', | |
302 | 'timestamp': 1476494751, | |
303 | 'upload_date': '20161015', | |
304 | 'uploader_id': '19138530', | |
305 | 'uploader': 'thematrixcam', | |
306 | 'comment_count': int, | |
307 | 'dislike_count': int, | |
308 | 'uploader_url': 'https://i.imgur.com/qCjr5Pi_d.png?maxwidth=290&fidelity=grand', | |
309 | 'release_timestamp': 1476494751, | |
310 | 'like_count': int, | |
311 | 'release_date': '20161015', | |
312 | 'thumbnail': 'https://i.imgur.com/VQcQPhMh.jpg', | |
313 | }, | |
314 | }, | |
315 | # from https://github.com/ytdl-org/youtube-dl/pull/16674 | |
316 | { | |
317 | 'url': 'https://imgur.com/t/unmuted/6lAn9VQ', | |
318 | 'info_dict': { | |
319 | 'id': '6lAn9VQ', | |
320 | 'title': 'Penguins !', | |
321 | }, | |
322 | 'playlist_count': 3, | |
323 | }, { | |
324 | 'url': 'https://imgur.com/t/unmuted/kx2uD3C', | |
325 | 'add_ies': ['Imgur'], | |
326 | 'info_dict': { | |
327 | 'id': 'ZVMv45i', | |
328 | 'ext': 'mp4', | |
329 | 'title': 'Intruder', | |
330 | 'timestamp': 1528129683, | |
331 | 'upload_date': '20180604', | |
332 | 'release_timestamp': 1528129683, | |
333 | 'release_date': '20180604', | |
334 | 'like_count': int, | |
335 | 'dislike_count': int, | |
336 | 'comment_count': int, | |
337 | 'duration': 30.03, | |
338 | 'thumbnail': 'https://i.imgur.com/ZVMv45ih.jpg', | |
339 | }, | |
340 | }, { | |
341 | 'url': 'https://imgur.com/t/unmuted/wXSK0YH', | |
342 | 'add_ies': ['Imgur'], | |
343 | 'info_dict': { | |
344 | 'id': 'JCAP4io', | |
345 | 'ext': 'mp4', | |
346 | 'title': 're:I got the blues$', | |
347 | 'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.', | |
348 | 'timestamp': 1527809525, | |
349 | 'upload_date': '20180531', | |
350 | 'like_count': int, | |
351 | 'dislike_count': int, | |
352 | 'duration': 30.03, | |
353 | 'comment_count': int, | |
354 | 'release_timestamp': 1527809525, | |
355 | 'thumbnail': 'https://i.imgur.com/JCAP4ioh.jpg', | |
356 | 'release_date': '20180531', | |
357 | }, | |
774ce355 | 358 | }] |
8875b3d5 | 359 | |
5f47a60c | 360 | |
65de7d20 | 361 | class ImgurAlbumIE(ImgurGalleryBaseIE): |
5f47a60c RA |
362 | IE_NAME = 'imgur:album' |
363 | _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)' | |
65de7d20 | 364 | _GALLERY = False |
5f47a60c | 365 | _TESTS = [{ |
65de7d20 | 366 | # TODO: only static images - replace with animated/video gallery |
5f47a60c | 367 | 'url': 'http://imgur.com/a/j6Orj', |
65de7d20 SS |
368 | 'only_matching': True, |
369 | }, | |
370 | # from https://github.com/ytdl-org/youtube-dl/pull/21693 | |
371 | { | |
372 | 'url': 'https://imgur.com/a/iX265HX', | |
373 | 'info_dict': { | |
374 | 'id': 'iX265HX', | |
375 | 'title': 'enen-no-shouboutai' | |
376 | }, | |
377 | 'playlist_count': 2, | |
378 | }, { | |
379 | 'url': 'https://imgur.com/a/8pih2Ed', | |
5f47a60c | 380 | 'info_dict': { |
65de7d20 | 381 | 'id': '8pih2Ed' |
5f47a60c | 382 | }, |
65de7d20 | 383 | 'playlist_mincount': 1, |
5f47a60c | 384 | }] |