]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/ninegag.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / ninegag.py
CommitLineData
7fc3fa05 1from .common import InfoExtractor
a820dc72 2from ..utils import (
a820dc72 3 ExtractorError,
bc2ca1bb 4 determine_ext,
a820dc72 5 int_or_none,
298d9c0e 6 traverse_obj,
bc2ca1bb 7 unescapeHTML,
a820dc72
RA
8 url_or_none,
9)
7fc3fa05
PH
10
11
12class NineGagIE(InfoExtractor):
13 IE_NAME = '9gag'
298d9c0e 14 IE_DESC = '9GAG'
a820dc72 15 _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
7fc3fa05 16
bc2ca1bb 17 _TESTS = [{
a820dc72 18 'url': 'https://9gag.com/gag/ae5Ag7B',
8ea6bd28 19 'info_dict': {
a820dc72 20 'id': 'ae5Ag7B',
298d9c0e 21 'ext': 'webm',
a820dc72
RA
22 'title': 'Capybara Agility Training',
23 'upload_date': '20191108',
24 'timestamp': 1573237208,
298d9c0e 25 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg',
a820dc72 26 'categories': ['Awesome'],
298d9c0e 27 'tags': ['Awesome'],
a820dc72
RA
28 'duration': 44,
29 'like_count': int,
30 'dislike_count': int,
31 'comment_count': int,
32 }
bc2ca1bb 33 }, {
34 # HTML escaped title
35 'url': 'https://9gag.com/gag/av5nvyb',
36 'only_matching': True,
298d9c0e
D
37 }, {
38 # Non Anonymous Uploader
39 'url': 'https://9gag.com/gag/ajgp66G',
40 'info_dict': {
41 'id': 'ajgp66G',
42 'ext': 'webm',
43 'title': 'Master Shifu! Or Splinter! You decide:',
44 'upload_date': '20220806',
45 'timestamp': 1659803411,
46 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg',
47 'categories': ['Funny'],
48 'tags': ['Funny'],
49 'duration': 26,
50 'like_count': int,
51 'dislike_count': int,
52 'comment_count': int,
53 'uploader': 'Peter Klaus',
54 'uploader_id': 'peterklaus12',
55 'uploader_url': 'https://9gag.com/u/peterklaus12',
56 }
bc2ca1bb 57 }]
7fc3fa05
PH
58
59 def _real_extract(self, url):
a820dc72
RA
60 post_id = self._match_id(url)
61 post = self._download_json(
62 'https://9gag.com/v1/post', post_id, query={
63 'id': post_id
64 })['data']['post']
65
66 if post.get('type') != 'Animated':
67 raise ExtractorError(
68 'The given url does not contain a video',
69 expected=True)
70
a820dc72
RA
71 duration = None
72 formats = []
73 thumbnails = []
74 for key, image in (post.get('images') or {}).items():
75 image_url = url_or_none(image.get('url'))
76 if not image_url:
77 continue
78 ext = determine_ext(image_url)
79 image_id = key.strip('image')
80 common = {
81 'url': image_url,
82 'width': int_or_none(image.get('width')),
83 'height': int_or_none(image.get('height')),
84 }
85 if ext in ('jpg', 'png'):
86 webp_url = image.get('webpUrl')
87 if webp_url:
88 t = common.copy()
89 t.update({
90 'id': image_id + '-webp',
91 'url': webp_url,
92 })
93 thumbnails.append(t)
94 common.update({
95 'id': image_id,
96 'ext': ext,
97 })
98 thumbnails.append(common)
99 elif ext in ('webm', 'mp4'):
100 if not duration:
101 duration = int_or_none(image.get('duration'))
102 common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
103 for vcodec in ('vp8', 'vp9', 'h265'):
104 c_url = image.get(vcodec + 'Url')
105 if not c_url:
106 continue
107 c_f = common.copy()
108 c_f.update({
109 'format_id': image_id + '-' + vcodec,
110 'url': c_url,
111 'vcodec': vcodec,
112 })
113 formats.append(c_f)
114 common.update({
115 'ext': ext,
116 'format_id': image_id,
117 })
118 formats.append(common)
7fc3fa05 119
298d9c0e 120 section = traverse_obj(post, ('postSection', 'name'))
7fc3fa05 121
a820dc72
RA
122 tags = None
123 post_tags = post.get('tags')
124 if post_tags:
125 tags = []
126 for tag in post_tags:
127 tag_key = tag.get('key')
128 if not tag_key:
129 continue
130 tags.append(tag_key)
d7666dff 131
7fc3fa05 132 return {
a820dc72 133 'id': post_id,
298d9c0e 134 'title': unescapeHTML(post.get('title')),
a820dc72
RA
135 'timestamp': int_or_none(post.get('creationTs')),
136 'duration': duration,
298d9c0e
D
137 'uploader': traverse_obj(post, ('creator', 'fullName')),
138 'uploader_id': traverse_obj(post, ('creator', 'username')),
139 'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))),
a820dc72
RA
140 'formats': formats,
141 'thumbnails': thumbnails,
298d9c0e
D
142 'like_count': int_or_none(post.get('upVoteCount')),
143 'dislike_count': int_or_none(post.get('downVoteCount')),
144 'comment_count': int_or_none(post.get('commentsCount')),
a820dc72
RA
145 'age_limit': 18 if post.get('nsfw') == 1 else None,
146 'categories': [section] if section else None,
147 'tags': tags,
7fc3fa05 148 }