]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/gfycat.py
[fc2] Fix extraction (#2572)
[yt-dlp.git] / yt_dlp / extractor / gfycat.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 int_or_none,
9 float_or_none,
10 qualities,
11 ExtractorError,
12 )
13
14
15 class GfycatIE(InfoExtractor):
16 _VALID_URL = r'(?i)https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
17 _TESTS = [{
18 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
19 'info_dict': {
20 'id': 'DeadlyDecisiveGermanpinscher',
21 'ext': 'mp4',
22 'title': 'Ghost in the Shell',
23 'timestamp': 1410656006,
24 'upload_date': '20140914',
25 'uploader': 'anonymous',
26 'duration': 10.4,
27 'view_count': int,
28 'like_count': int,
29 'categories': list,
30 'age_limit': 0,
31 'uploader_id': 'anonymous',
32 'description': '',
33 }
34 }, {
35 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
36 'info_dict': {
37 'id': 'JauntyTimelyAmazontreeboa',
38 'ext': 'mp4',
39 'title': 'JauntyTimelyAmazontreeboa',
40 'timestamp': 1411720126,
41 'upload_date': '20140926',
42 'uploader': 'anonymous',
43 'duration': 3.52,
44 'view_count': int,
45 'like_count': int,
46 'categories': list,
47 'age_limit': 0,
48 'uploader_id': 'anonymous',
49 'description': '',
50 }
51 }, {
52 'url': 'https://gfycat.com/alienatedsolidgreathornedowl',
53 'info_dict': {
54 'id': 'alienatedsolidgreathornedowl',
55 'ext': 'mp4',
56 'upload_date': '20211226',
57 'uploader_id': 'reactions',
58 'timestamp': 1640536930,
59 'like_count': int,
60 'description': '',
61 'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year',
62 'categories': list,
63 'age_limit': 0,
64 'duration': 2.9583333333333335,
65 'uploader': 'Reaction GIFs',
66 'view_count': int,
67 }
68 }, {
69 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
70 'only_matching': True
71 }, {
72 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
73 'only_matching': True
74 }, {
75 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
76 'only_matching': True
77 }, {
78 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
79 'only_matching': True
80 }, {
81 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
82 'only_matching': True
83 }, {
84 'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa',
85 'only_matching': True
86 }]
87
88 @staticmethod
89 def _extract_urls(webpage):
90 return [
91 mobj.group('url')
92 for mobj in re.finditer(
93 r'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>%s)' % GfycatIE._VALID_URL,
94 webpage)]
95
96 def _real_extract(self, url):
97 video_id = self._match_id(url)
98
99 gfy = self._download_json(
100 'https://api.gfycat.com/v1/gfycats/%s' % video_id,
101 video_id, 'Downloading video info')
102 if 'error' in gfy:
103 raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
104 gfy = gfy['gfyItem']
105
106 title = gfy.get('title') or gfy['gfyName']
107 description = gfy.get('description')
108 timestamp = int_or_none(gfy.get('createDate'))
109 uploader = gfy.get('userName') or gfy.get('username')
110 view_count = int_or_none(gfy.get('views'))
111 like_count = int_or_none(gfy.get('likes'))
112 dislike_count = int_or_none(gfy.get('dislikes'))
113 age_limit = 18 if gfy.get('nsfw') == '1' else 0
114
115 width = int_or_none(gfy.get('width'))
116 height = int_or_none(gfy.get('height'))
117 fps = int_or_none(gfy.get('frameRate'))
118 num_frames = int_or_none(gfy.get('numFrames'))
119
120 duration = float_or_none(num_frames, fps) if num_frames and fps else None
121
122 categories = gfy.get('tags') or gfy.get('extraLemmas') or []
123
124 FORMATS = ('gif', 'webm', 'mp4')
125 quality = qualities(FORMATS)
126
127 formats = []
128 for format_id in FORMATS:
129 video_url = gfy.get('%sUrl' % format_id)
130 if not video_url:
131 continue
132 filesize = int_or_none(gfy.get('%sSize' % format_id))
133 formats.append({
134 'url': video_url,
135 'format_id': format_id,
136 'width': width,
137 'height': height,
138 'fps': fps,
139 'filesize': filesize,
140 'quality': quality(format_id),
141 })
142 self._sort_formats(formats)
143
144 return {
145 'id': video_id,
146 'title': title,
147 'description': description,
148 'timestamp': timestamp,
149 'uploader': gfy.get('userDisplayName') or uploader,
150 'uploader_id': uploader,
151 'duration': duration,
152 'view_count': view_count,
153 'like_count': like_count,
154 'dislike_count': dislike_count,
155 'categories': categories,
156 'age_limit': age_limit,
157 'formats': formats,
158 }