]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/gfycat.py
[extractor] Support multiple archive ids for one video (#4307)
[yt-dlp.git] / yt_dlp / extractor / gfycat.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 float_or_none,
7 qualities,
8 ExtractorError,
9 )
10
11
12 class GfycatIE(InfoExtractor):
13 _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
14 _TESTS = [{
15 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
16 'info_dict': {
17 'id': 'DeadlyDecisiveGermanpinscher',
18 'ext': 'mp4',
19 'title': 'Ghost in the Shell',
20 'timestamp': 1410656006,
21 'upload_date': '20140914',
22 'uploader': 'anonymous',
23 'duration': 10.4,
24 'view_count': int,
25 'like_count': int,
26 'categories': list,
27 'age_limit': 0,
28 'uploader_id': 'anonymous',
29 'description': '',
30 }
31 }, {
32 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
33 'info_dict': {
34 'id': 'JauntyTimelyAmazontreeboa',
35 'ext': 'mp4',
36 'title': 'JauntyTimelyAmazontreeboa',
37 'timestamp': 1411720126,
38 'upload_date': '20140926',
39 'uploader': 'anonymous',
40 'duration': 3.52,
41 'view_count': int,
42 'like_count': int,
43 'categories': list,
44 'age_limit': 0,
45 'uploader_id': 'anonymous',
46 'description': '',
47 }
48 }, {
49 'url': 'https://gfycat.com/alienatedsolidgreathornedowl',
50 'info_dict': {
51 'id': 'alienatedsolidgreathornedowl',
52 'ext': 'mp4',
53 'upload_date': '20211226',
54 'uploader_id': 'reactions',
55 'timestamp': 1640536930,
56 'like_count': int,
57 'description': '',
58 'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year',
59 'categories': list,
60 'age_limit': 0,
61 'duration': 2.9583333333333335,
62 'uploader': 'Reaction GIFs',
63 'view_count': int,
64 }
65 }, {
66 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
67 'only_matching': True
68 }, {
69 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
70 'only_matching': True
71 }, {
72 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
73 'only_matching': True
74 }, {
75 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
76 'only_matching': True
77 }, {
78 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
79 'only_matching': True
80 }, {
81 'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa',
82 'only_matching': True
83 }]
84
85 @staticmethod
86 def _extract_urls(webpage):
87 return [
88 mobj.group('url')
89 for mobj in re.finditer(
90 r'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>%s)' % GfycatIE._VALID_URL,
91 webpage)]
92
93 def _real_extract(self, url):
94 video_id = self._match_id(url)
95
96 gfy = self._download_json(
97 'https://api.gfycat.com/v1/gfycats/%s' % video_id,
98 video_id, 'Downloading video info')
99 if 'error' in gfy:
100 raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
101 gfy = gfy['gfyItem']
102
103 title = gfy.get('title') or gfy['gfyName']
104 description = gfy.get('description')
105 timestamp = int_or_none(gfy.get('createDate'))
106 uploader = gfy.get('userName') or gfy.get('username')
107 view_count = int_or_none(gfy.get('views'))
108 like_count = int_or_none(gfy.get('likes'))
109 dislike_count = int_or_none(gfy.get('dislikes'))
110 age_limit = 18 if gfy.get('nsfw') == '1' else 0
111
112 width = int_or_none(gfy.get('width'))
113 height = int_or_none(gfy.get('height'))
114 fps = int_or_none(gfy.get('frameRate'))
115 num_frames = int_or_none(gfy.get('numFrames'))
116
117 duration = float_or_none(num_frames, fps) if num_frames and fps else None
118
119 categories = gfy.get('tags') or gfy.get('extraLemmas') or []
120
121 FORMATS = ('gif', 'webm', 'mp4')
122 quality = qualities(FORMATS)
123
124 formats = []
125 for format_id in FORMATS:
126 video_url = gfy.get('%sUrl' % format_id)
127 if not video_url:
128 continue
129 filesize = int_or_none(gfy.get('%sSize' % format_id))
130 formats.append({
131 'url': video_url,
132 'format_id': format_id,
133 'width': width,
134 'height': height,
135 'fps': fps,
136 'filesize': filesize,
137 'quality': quality(format_id),
138 })
139 self._sort_formats(formats)
140
141 return {
142 'id': video_id,
143 'title': title,
144 'description': description,
145 'timestamp': timestamp,
146 'uploader': gfy.get('userDisplayName') or uploader,
147 'uploader_id': uploader,
148 'duration': duration,
149 'view_count': view_count,
150 'like_count': like_count,
151 'dislike_count': dislike_count,
152 'categories': categories,
153 'age_limit': age_limit,
154 'formats': formats,
155 }