]>
Commit | Line | Data |
---|---|---|
ed850070 JMF |
1 | from __future__ import unicode_literals |
2 | ||
7fc3fa05 | 3 | from .common import InfoExtractor |
a820dc72 | 4 | from ..utils import ( |
a820dc72 | 5 | ExtractorError, |
bc2ca1bb | 6 | determine_ext, |
a820dc72 RA |
7 | int_or_none, |
8 | try_get, | |
bc2ca1bb | 9 | unescapeHTML, |
a820dc72 RA |
10 | url_or_none, |
11 | ) | |
7fc3fa05 PH |
12 | |
13 | ||
14 | class NineGagIE(InfoExtractor): | |
15 | IE_NAME = '9gag' | |
a820dc72 | 16 | _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)' |
7fc3fa05 | 17 | |
bc2ca1bb | 18 | _TESTS = [{ |
a820dc72 | 19 | 'url': 'https://9gag.com/gag/ae5Ag7B', |
8ea6bd28 | 20 | 'info_dict': { |
a820dc72 | 21 | 'id': 'ae5Ag7B', |
8ea6bd28 | 22 | 'ext': 'mp4', |
a820dc72 RA |
23 | 'title': 'Capybara Agility Training', |
24 | 'upload_date': '20191108', | |
25 | 'timestamp': 1573237208, | |
26 | 'categories': ['Awesome'], | |
27 | 'tags': ['Weimaraner', 'American Pit Bull Terrier'], | |
28 | 'duration': 44, | |
29 | 'like_count': int, | |
30 | 'dislike_count': int, | |
31 | 'comment_count': int, | |
32 | } | |
bc2ca1bb | 33 | }, { |
34 | # HTML escaped title | |
35 | 'url': 'https://9gag.com/gag/av5nvyb', | |
36 | 'only_matching': True, | |
37 | }] | |
7fc3fa05 PH |
38 | |
39 | def _real_extract(self, url): | |
a820dc72 RA |
40 | post_id = self._match_id(url) |
41 | post = self._download_json( | |
42 | 'https://9gag.com/v1/post', post_id, query={ | |
43 | 'id': post_id | |
44 | })['data']['post'] | |
45 | ||
46 | if post.get('type') != 'Animated': | |
47 | raise ExtractorError( | |
48 | 'The given url does not contain a video', | |
49 | expected=True) | |
50 | ||
bc2ca1bb | 51 | title = unescapeHTML(post['title']) |
a820dc72 RA |
52 | |
53 | duration = None | |
54 | formats = [] | |
55 | thumbnails = [] | |
56 | for key, image in (post.get('images') or {}).items(): | |
57 | image_url = url_or_none(image.get('url')) | |
58 | if not image_url: | |
59 | continue | |
60 | ext = determine_ext(image_url) | |
61 | image_id = key.strip('image') | |
62 | common = { | |
63 | 'url': image_url, | |
64 | 'width': int_or_none(image.get('width')), | |
65 | 'height': int_or_none(image.get('height')), | |
66 | } | |
67 | if ext in ('jpg', 'png'): | |
68 | webp_url = image.get('webpUrl') | |
69 | if webp_url: | |
70 | t = common.copy() | |
71 | t.update({ | |
72 | 'id': image_id + '-webp', | |
73 | 'url': webp_url, | |
74 | }) | |
75 | thumbnails.append(t) | |
76 | common.update({ | |
77 | 'id': image_id, | |
78 | 'ext': ext, | |
79 | }) | |
80 | thumbnails.append(common) | |
81 | elif ext in ('webm', 'mp4'): | |
82 | if not duration: | |
83 | duration = int_or_none(image.get('duration')) | |
84 | common['acodec'] = 'none' if image.get('hasAudio') == 0 else None | |
85 | for vcodec in ('vp8', 'vp9', 'h265'): | |
86 | c_url = image.get(vcodec + 'Url') | |
87 | if not c_url: | |
88 | continue | |
89 | c_f = common.copy() | |
90 | c_f.update({ | |
91 | 'format_id': image_id + '-' + vcodec, | |
92 | 'url': c_url, | |
93 | 'vcodec': vcodec, | |
94 | }) | |
95 | formats.append(c_f) | |
96 | common.update({ | |
97 | 'ext': ext, | |
98 | 'format_id': image_id, | |
99 | }) | |
100 | formats.append(common) | |
101 | self._sort_formats(formats) | |
7fc3fa05 | 102 | |
a820dc72 | 103 | section = try_get(post, lambda x: x['postSection']['name']) |
7fc3fa05 | 104 | |
a820dc72 RA |
105 | tags = None |
106 | post_tags = post.get('tags') | |
107 | if post_tags: | |
108 | tags = [] | |
109 | for tag in post_tags: | |
110 | tag_key = tag.get('key') | |
111 | if not tag_key: | |
112 | continue | |
113 | tags.append(tag_key) | |
d7666dff | 114 | |
a820dc72 | 115 | get_count = lambda x: int_or_none(post.get(x + 'Count')) |
7fc3fa05 PH |
116 | |
117 | return { | |
a820dc72 | 118 | 'id': post_id, |
4be9f8c8 | 119 | 'title': title, |
a820dc72 RA |
120 | 'timestamp': int_or_none(post.get('creationTs')), |
121 | 'duration': duration, | |
122 | 'formats': formats, | |
123 | 'thumbnails': thumbnails, | |
124 | 'like_count': get_count('upVote'), | |
125 | 'dislike_count': get_count('downVote'), | |
126 | 'comment_count': get_count('comments'), | |
127 | 'age_limit': 18 if post.get('nsfw') == 1 else None, | |
128 | 'categories': [section] if section else None, | |
129 | 'tags': tags, | |
7fc3fa05 | 130 | } |