]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ( | |
3 | ExtractorError, | |
4 | determine_ext, | |
5 | int_or_none, | |
6 | traverse_obj, | |
7 | unescapeHTML, | |
8 | url_or_none, | |
9 | ) | |
10 | ||
11 | ||
12 | class NineGagIE(InfoExtractor): | |
13 | IE_NAME = '9gag' | |
14 | IE_DESC = '9GAG' | |
15 | _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)' | |
16 | ||
17 | _TESTS = [{ | |
18 | 'url': 'https://9gag.com/gag/ae5Ag7B', | |
19 | 'info_dict': { | |
20 | 'id': 'ae5Ag7B', | |
21 | 'ext': 'webm', | |
22 | 'title': 'Capybara Agility Training', | |
23 | 'upload_date': '20191108', | |
24 | 'timestamp': 1573237208, | |
25 | 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg', | |
26 | 'categories': ['Awesome'], | |
27 | 'tags': ['Awesome'], | |
28 | 'duration': 44, | |
29 | 'like_count': int, | |
30 | 'dislike_count': int, | |
31 | 'comment_count': int, | |
32 | }, | |
33 | }, { | |
34 | # HTML escaped title | |
35 | 'url': 'https://9gag.com/gag/av5nvyb', | |
36 | 'only_matching': True, | |
37 | }, { | |
38 | # Non Anonymous Uploader | |
39 | 'url': 'https://9gag.com/gag/ajgp66G', | |
40 | 'info_dict': { | |
41 | 'id': 'ajgp66G', | |
42 | 'ext': 'webm', | |
43 | 'title': 'Master Shifu! Or Splinter! You decide:', | |
44 | 'upload_date': '20220806', | |
45 | 'timestamp': 1659803411, | |
46 | 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg', | |
47 | 'categories': ['Funny'], | |
48 | 'tags': ['Funny'], | |
49 | 'duration': 26, | |
50 | 'like_count': int, | |
51 | 'dislike_count': int, | |
52 | 'comment_count': int, | |
53 | 'uploader': 'Peter Klaus', | |
54 | 'uploader_id': 'peterklaus12', | |
55 | 'uploader_url': 'https://9gag.com/u/peterklaus12', | |
56 | }, | |
57 | }] | |
58 | ||
59 | def _real_extract(self, url): | |
60 | post_id = self._match_id(url) | |
61 | post = self._download_json( | |
62 | 'https://9gag.com/v1/post', post_id, query={ | |
63 | 'id': post_id, | |
64 | })['data']['post'] | |
65 | ||
66 | if post.get('type') != 'Animated': | |
67 | raise ExtractorError( | |
68 | 'The given url does not contain a video', | |
69 | expected=True) | |
70 | ||
71 | duration = None | |
72 | formats = [] | |
73 | thumbnails = [] | |
74 | for key, image in (post.get('images') or {}).items(): | |
75 | image_url = url_or_none(image.get('url')) | |
76 | if not image_url: | |
77 | continue | |
78 | ext = determine_ext(image_url) | |
79 | image_id = key.strip('image') | |
80 | common = { | |
81 | 'url': image_url, | |
82 | 'width': int_or_none(image.get('width')), | |
83 | 'height': int_or_none(image.get('height')), | |
84 | } | |
85 | if ext in ('jpg', 'png'): | |
86 | webp_url = image.get('webpUrl') | |
87 | if webp_url: | |
88 | t = common.copy() | |
89 | t.update({ | |
90 | 'id': image_id + '-webp', | |
91 | 'url': webp_url, | |
92 | }) | |
93 | thumbnails.append(t) | |
94 | common.update({ | |
95 | 'id': image_id, | |
96 | 'ext': ext, | |
97 | }) | |
98 | thumbnails.append(common) | |
99 | elif ext in ('webm', 'mp4'): | |
100 | if not duration: | |
101 | duration = int_or_none(image.get('duration')) | |
102 | common['acodec'] = 'none' if image.get('hasAudio') == 0 else None | |
103 | for vcodec in ('vp8', 'vp9', 'h265'): | |
104 | c_url = image.get(vcodec + 'Url') | |
105 | if not c_url: | |
106 | continue | |
107 | c_f = common.copy() | |
108 | c_f.update({ | |
109 | 'format_id': image_id + '-' + vcodec, | |
110 | 'url': c_url, | |
111 | 'vcodec': vcodec, | |
112 | }) | |
113 | formats.append(c_f) | |
114 | common.update({ | |
115 | 'ext': ext, | |
116 | 'format_id': image_id, | |
117 | }) | |
118 | formats.append(common) | |
119 | ||
120 | section = traverse_obj(post, ('postSection', 'name')) | |
121 | ||
122 | tags = None | |
123 | post_tags = post.get('tags') | |
124 | if post_tags: | |
125 | tags = [] | |
126 | for tag in post_tags: | |
127 | tag_key = tag.get('key') | |
128 | if not tag_key: | |
129 | continue | |
130 | tags.append(tag_key) | |
131 | ||
132 | return { | |
133 | 'id': post_id, | |
134 | 'title': unescapeHTML(post.get('title')), | |
135 | 'timestamp': int_or_none(post.get('creationTs')), | |
136 | 'duration': duration, | |
137 | 'uploader': traverse_obj(post, ('creator', 'fullName')), | |
138 | 'uploader_id': traverse_obj(post, ('creator', 'username')), | |
139 | 'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))), | |
140 | 'formats': formats, | |
141 | 'thumbnails': thumbnails, | |
142 | 'like_count': int_or_none(post.get('upVoteCount')), | |
143 | 'dislike_count': int_or_none(post.get('downVoteCount')), | |
144 | 'comment_count': int_or_none(post.get('commentsCount')), | |
145 | 'age_limit': 18 if post.get('nsfw') == 1 else None, | |
146 | 'categories': [section] if section else None, | |
147 | 'tags': tags, | |
148 | } |