]>
Commit | Line | Data |
---|---|---|
65de7d20 | 1 | import functools |
3bf57053 PH |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
65de7d20 SS |
6 | ExtractorError, |
7 | determine_ext, | |
8 | float_or_none, | |
3bf57053 PH |
9 | int_or_none, |
10 | js_to_json, | |
11 | mimetype2ext, | |
65de7d20 SS |
12 | parse_iso8601, |
13 | str_or_none, | |
14 | strip_or_none, | |
15 | traverse_obj, | |
16 | url_or_none, | |
3bf57053 PH |
17 | ) |
18 | ||
b88ba053 | 19 | |
65de7d20 SS |
20 | class ImgurBaseIE(InfoExtractor): |
21 | _CLIENT_ID = '546c25a59c58ad7' | |
22 | ||
23 | @classmethod | |
24 | def _imgur_result(cls, item_id): | |
25 | return cls.url_result(f'https://imgur.com/{item_id}', ImgurIE, item_id) | |
26 | ||
27 | def _call_api(self, endpoint, video_id, **kwargs): | |
28 | return self._download_json( | |
29 | f'https://api.imgur.com/post/v1/{endpoint}/{video_id}?client_id={self._CLIENT_ID}&include=media,account', | |
30 | video_id, **kwargs) | |
31 | ||
32 | @staticmethod | |
33 | def get_description(s): | |
34 | if 'Discover the magic of the internet at Imgur' in s: | |
35 | return None | |
36 | return s or None | |
37 | ||
38 | ||
39 | class ImgurIE(ImgurBaseIE): | |
40 | _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)' | |
3bf57053 PH |
41 | |
42 | _TESTS = [{ | |
65de7d20 | 43 | 'url': 'https://imgur.com/A61SaA1', |
3bf57053 PH |
44 | 'info_dict': { |
45 | 'id': 'A61SaA1', | |
46 | 'ext': 'mp4', | |
65de7d20 SS |
47 | 'title': 'MRW gifv is up and running without any bugs', |
48 | 'timestamp': 1416446068, | |
49 | 'upload_date': '20141120', | |
50 | 'dislike_count': int, | |
51 | 'comment_count': int, | |
52 | 'release_timestamp': 1416446068, | |
53 | 'release_date': '20141120', | |
54 | 'like_count': int, | |
55 | 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', | |
3bf57053 | 56 | }, |
1a13940c | 57 | }, { |
65de7d20 | 58 | 'url': 'https://i.imgur.com/A61SaA1.gifv', |
e1a0b3b8 | 59 | 'only_matching': True, |
905eef2b JW |
60 | }, { |
61 | 'url': 'https://i.imgur.com/crGpqCV.mp4', | |
62 | 'only_matching': True, | |
6f5c1807 | 63 | }, { |
6f5c1807 | 64 | 'url': 'https://i.imgur.com/jxBXAMC.gifv', |
65de7d20 SS |
65 | 'info_dict': { |
66 | 'id': 'jxBXAMC', | |
67 | 'ext': 'mp4', | |
68 | 'title': 'Fahaka puffer feeding', | |
69 | 'timestamp': 1533835503, | |
70 | 'upload_date': '20180809', | |
71 | 'release_date': '20180809', | |
72 | 'like_count': int, | |
73 | 'duration': 30.0, | |
74 | 'comment_count': int, | |
75 | 'release_timestamp': 1533835503, | |
76 | 'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg', | |
77 | 'dislike_count': int, | |
78 | }, | |
3bf57053 PH |
79 | }] |
80 | ||
81 | def _real_extract(self, url): | |
82 | video_id = self._match_id(url) | |
65de7d20 SS |
83 | data = self._call_api('media', video_id) |
84 | if not traverse_obj(data, ('media', 0, ( | |
85 | ('type', {lambda t: t == 'video' or None}), | |
86 | ('metadata', 'is_animated'))), get_all=False): | |
87 | raise ExtractorError(f'{video_id} is not a video or animated image', expected=True) | |
5f47a60c | 88 | webpage = self._download_webpage( |
65de7d20 SS |
89 | f'https://i.imgur.com/{video_id}.gifv', video_id, fatal=False) or '' |
90 | formats = [] | |
3bf57053 | 91 | |
65de7d20 SS |
92 | media_fmt = traverse_obj(data, ('media', 0, { |
93 | 'url': ('url', {url_or_none}), | |
94 | 'ext': ('ext', {str}), | |
95 | 'width': ('width', {int_or_none}), | |
96 | 'height': ('height', {int_or_none}), | |
97 | 'filesize': ('size', {int_or_none}), | |
98 | 'acodec': ('metadata', 'has_sound', {lambda b: None if b else 'none'}), | |
99 | })) | |
100 | media_url = media_fmt.get('url') | |
101 | if media_url: | |
102 | if not media_fmt.get('ext'): | |
103 | media_fmt['ext'] = mimetype2ext(traverse_obj( | |
104 | data, ('media', 0, 'mime_type'))) or determine_ext(media_url) | |
105 | if traverse_obj(data, ('media', 0, 'type')) == 'image': | |
106 | media_fmt['acodec'] = 'none' | |
107 | media_fmt.setdefault('preference', -10) | |
108 | formats.append(media_fmt) | |
3bf57053 | 109 | |
b88ba053 | 110 | video_elements = self._search_regex( |
3bf57053 | 111 | r'(?s)<div class="video-elements">(.*?)</div>', |
b88ba053 | 112 | webpage, 'video elements', default=None) |
9e2d7dca | 113 | |
65de7d20 SS |
114 | if video_elements: |
115 | def og_get_size(media_type): | |
116 | return { | |
117 | p: int_or_none(self._og_search_property(f'{media_type}:{p}', webpage, default=None)) | |
118 | for p in ('width', 'height') | |
119 | } | |
120 | ||
121 | size = og_get_size('video') | |
122 | if not any(size.values()): | |
123 | size = og_get_size('image') | |
124 | ||
125 | formats = traverse_obj( | |
126 | re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements), | |
127 | (..., { | |
128 | 'format_id': ('type', {lambda s: s.partition('/')[2]}), | |
129 | 'url': ('src', {self._proto_relative_url}), | |
130 | 'ext': ('type', {mimetype2ext}), | |
131 | })) | |
132 | for f in formats: | |
133 | f.update(size) | |
3bf57053 | 134 | |
65de7d20 SS |
135 | # We can get the original gif format from the webpage as well |
136 | gif_json = traverse_obj(self._search_json( | |
137 | r'var\s+videoItem\s*=', webpage, 'GIF info', video_id, | |
138 | transform_source=js_to_json, fatal=False), { | |
139 | 'url': ('gifUrl', {self._proto_relative_url}), | |
140 | 'filesize': ('size', {int_or_none}), | |
3bf57053 | 141 | }) |
65de7d20 SS |
142 | if gif_json: |
143 | gif_json.update(size) | |
144 | gif_json.update({ | |
145 | 'format_id': 'gif', | |
146 | 'preference': -10, # gifs < videos | |
147 | 'ext': 'gif', | |
148 | 'acodec': 'none', | |
149 | 'vcodec': 'gif', | |
150 | 'container': 'gif', | |
151 | }) | |
152 | formats.append(gif_json) | |
153 | ||
154 | search = functools.partial(self._html_search_meta, html=webpage, default=None) | |
155 | ||
156 | twitter_fmt = { | |
157 | 'format_id': 'twitter', | |
158 | 'url': url_or_none(search('twitter:player:stream')), | |
159 | 'ext': mimetype2ext(search('twitter:player:stream:content_type')), | |
160 | 'width': int_or_none(search('twitter:width')), | |
161 | 'height': int_or_none(search('twitter:height')), | |
162 | } | |
163 | if twitter_fmt['url']: | |
164 | formats.append(twitter_fmt) | |
165 | ||
166 | if not formats: | |
167 | self.raise_no_formats( | |
168 | f'No sources found for video {video_id}. Maybe a plain image?', expected=True) | |
169 | self._remove_duplicate_formats(formats) | |
3bf57053 | 170 | |
3bf57053 | 171 | return { |
65de7d20 SS |
172 | 'title': self._og_search_title(webpage, default=None), |
173 | 'description': self.get_description(self._og_search_description(webpage, default='')), | |
174 | **traverse_obj(data, { | |
175 | 'uploader_id': ('account_id', {lambda a: str(a) if int_or_none(a) else None}), | |
176 | 'uploader': ('account', 'username', {lambda x: strip_or_none(x) or None}), | |
177 | 'uploader_url': ('account', 'avatar_url', {url_or_none}), | |
178 | 'like_count': ('upvote_count', {int_or_none}), | |
179 | 'dislike_count': ('downvote_count', {int_or_none}), | |
180 | 'comment_count': ('comment_count', {int_or_none}), | |
181 | 'age_limit': ('is_mature', {lambda x: 18 if x else None}), | |
182 | 'timestamp': (('updated_at', 'created_at'), {parse_iso8601}), | |
183 | 'release_timestamp': ('created_at', {parse_iso8601}), | |
184 | }, get_all=False), | |
185 | **traverse_obj(data, ('media', 0, 'metadata', { | |
186 | 'title': ('title', {lambda x: strip_or_none(x) or None}), | |
187 | 'description': ('description', {self.get_description}), | |
188 | 'duration': ('duration', {float_or_none}), | |
189 | 'timestamp': (('updated_at', 'created_at'), {parse_iso8601}), | |
190 | 'release_timestamp': ('created_at', {parse_iso8601}), | |
191 | }), get_all=False), | |
3bf57053 PH |
192 | 'id': video_id, |
193 | 'formats': formats, | |
65de7d20 | 194 | 'thumbnail': url_or_none(search('thumbnailUrl')), |
3bf57053 | 195 | } |
8875b3d5 S |
196 | |
197 | ||
65de7d20 SS |
198 | class ImgurGalleryBaseIE(ImgurBaseIE): |
199 | _GALLERY = True | |
200 | ||
201 | def _real_extract(self, url): | |
202 | gallery_id = self._match_id(url) | |
203 | ||
204 | data = self._call_api('albums', gallery_id, fatal=False, expected_status=404) | |
205 | ||
206 | info = traverse_obj(data, { | |
207 | 'title': ('title', {lambda x: strip_or_none(x) or None}), | |
208 | 'description': ('description', {self.get_description}), | |
209 | }) | |
210 | ||
211 | if traverse_obj(data, 'is_album'): | |
212 | ||
213 | def yield_media_ids(): | |
214 | for m_id in traverse_obj(data, ( | |
215 | 'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'], | |
216 | 'id', {lambda x: str_or_none(x) or None})): | |
217 | yield m_id | |
218 | ||
219 | # if a gallery with exactly one video, apply album metadata to video | |
220 | media_id = ( | |
221 | self._GALLERY | |
222 | and traverse_obj(data, ('image_count', {lambda c: c == 1})) | |
223 | and next(yield_media_ids(), None)) | |
224 | ||
225 | if not media_id: | |
226 | result = self.playlist_result( | |
227 | map(self._imgur_result, yield_media_ids()), gallery_id) | |
228 | result.update(info) | |
229 | return result | |
230 | gallery_id = media_id | |
231 | ||
232 | result = self._imgur_result(gallery_id) | |
233 | info['_type'] = 'url_transparent' | |
234 | result.update(info) | |
235 | return result | |
236 | ||
237 | ||
238 | class ImgurGalleryIE(ImgurGalleryBaseIE): | |
5f47a60c | 239 | IE_NAME = 'imgur:gallery' |
65de7d20 | 240 | _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)' |
8875b3d5 | 241 | |
774ce355 | 242 | _TESTS = [{ |
8875b3d5 S |
243 | 'url': 'http://imgur.com/gallery/Q95ko', |
244 | 'info_dict': { | |
245 | 'id': 'Q95ko', | |
5f47a60c | 246 | 'title': 'Adding faces make every GIF better', |
8875b3d5 S |
247 | }, |
248 | 'playlist_count': 25, | |
65de7d20 | 249 | 'skip': 'Zoinks! You\'ve taken a wrong turn.', |
774ce355 | 250 | }, { |
65de7d20 | 251 | # TODO: static images - replace with animated/video gallery |
5f47a60c | 252 | 'url': 'http://imgur.com/topic/Aww/ll5Vk', |
774ce355 S |
253 | 'only_matching': True, |
254 | }, { | |
5f47a60c | 255 | 'url': 'https://imgur.com/gallery/YcAQlkx', |
65de7d20 | 256 | 'add_ies': ['Imgur'], |
5f47a60c RA |
257 | 'info_dict': { |
258 | 'id': 'YcAQlkx', | |
259 | 'ext': 'mp4', | |
260 | 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', | |
65de7d20 SS |
261 | 'timestamp': 1358554297, |
262 | 'upload_date': '20130119', | |
263 | 'uploader_id': '1648642', | |
264 | 'uploader': 'wittyusernamehere', | |
265 | 'release_timestamp': 1358554297, | |
266 | 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', | |
267 | 'release_date': '20130119', | |
268 | 'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand', | |
269 | 'comment_count': int, | |
270 | 'dislike_count': int, | |
271 | 'like_count': int, | |
272 | }, | |
5f47a60c | 273 | }, { |
65de7d20 | 274 | # TODO: static image - replace with animated/video gallery |
5f47a60c RA |
275 | 'url': 'http://imgur.com/topic/Funny/N8rOudd', |
276 | 'only_matching': True, | |
277 | }, { | |
278 | 'url': 'http://imgur.com/r/aww/VQcQPhM', | |
65de7d20 SS |
279 | 'add_ies': ['Imgur'], |
280 | 'info_dict': { | |
281 | 'id': 'VQcQPhM', | |
282 | 'ext': 'mp4', | |
283 | 'title': 'The boss is here', | |
284 | 'timestamp': 1476494751, | |
285 | 'upload_date': '20161015', | |
286 | 'uploader_id': '19138530', | |
287 | 'uploader': 'thematrixcam', | |
288 | 'comment_count': int, | |
289 | 'dislike_count': int, | |
290 | 'uploader_url': 'https://i.imgur.com/qCjr5Pi_d.png?maxwidth=290&fidelity=grand', | |
291 | 'release_timestamp': 1476494751, | |
292 | 'like_count': int, | |
293 | 'release_date': '20161015', | |
294 | 'thumbnail': 'https://i.imgur.com/VQcQPhMh.jpg', | |
295 | }, | |
296 | }, | |
297 | # from https://github.com/ytdl-org/youtube-dl/pull/16674 | |
298 | { | |
299 | 'url': 'https://imgur.com/t/unmuted/6lAn9VQ', | |
300 | 'info_dict': { | |
301 | 'id': '6lAn9VQ', | |
302 | 'title': 'Penguins !', | |
303 | }, | |
304 | 'playlist_count': 3, | |
305 | }, { | |
306 | 'url': 'https://imgur.com/t/unmuted/kx2uD3C', | |
307 | 'add_ies': ['Imgur'], | |
308 | 'info_dict': { | |
309 | 'id': 'ZVMv45i', | |
310 | 'ext': 'mp4', | |
311 | 'title': 'Intruder', | |
312 | 'timestamp': 1528129683, | |
313 | 'upload_date': '20180604', | |
314 | 'release_timestamp': 1528129683, | |
315 | 'release_date': '20180604', | |
316 | 'like_count': int, | |
317 | 'dislike_count': int, | |
318 | 'comment_count': int, | |
319 | 'duration': 30.03, | |
320 | 'thumbnail': 'https://i.imgur.com/ZVMv45ih.jpg', | |
321 | }, | |
322 | }, { | |
323 | 'url': 'https://imgur.com/t/unmuted/wXSK0YH', | |
324 | 'add_ies': ['Imgur'], | |
325 | 'info_dict': { | |
326 | 'id': 'JCAP4io', | |
327 | 'ext': 'mp4', | |
328 | 'title': 're:I got the blues$', | |
329 | 'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.', | |
330 | 'timestamp': 1527809525, | |
331 | 'upload_date': '20180531', | |
332 | 'like_count': int, | |
333 | 'dislike_count': int, | |
334 | 'duration': 30.03, | |
335 | 'comment_count': int, | |
336 | 'release_timestamp': 1527809525, | |
337 | 'thumbnail': 'https://i.imgur.com/JCAP4ioh.jpg', | |
338 | 'release_date': '20180531', | |
339 | }, | |
774ce355 | 340 | }] |
8875b3d5 | 341 | |
5f47a60c | 342 | |
65de7d20 | 343 | class ImgurAlbumIE(ImgurGalleryBaseIE): |
5f47a60c RA |
344 | IE_NAME = 'imgur:album' |
345 | _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)' | |
65de7d20 | 346 | _GALLERY = False |
5f47a60c | 347 | _TESTS = [{ |
65de7d20 | 348 | # TODO: only static images - replace with animated/video gallery |
5f47a60c | 349 | 'url': 'http://imgur.com/a/j6Orj', |
65de7d20 SS |
350 | 'only_matching': True, |
351 | }, | |
352 | # from https://github.com/ytdl-org/youtube-dl/pull/21693 | |
353 | { | |
354 | 'url': 'https://imgur.com/a/iX265HX', | |
355 | 'info_dict': { | |
356 | 'id': 'iX265HX', | |
357 | 'title': 'enen-no-shouboutai' | |
358 | }, | |
359 | 'playlist_count': 2, | |
360 | }, { | |
361 | 'url': 'https://imgur.com/a/8pih2Ed', | |
5f47a60c | 362 | 'info_dict': { |
65de7d20 | 363 | 'id': '8pih2Ed' |
5f47a60c | 364 | }, |
65de7d20 | 365 | 'playlist_mincount': 1, |
5f47a60c | 366 | }] |