]>
Commit | Line | Data |
---|---|---|
1 | import itertools | |
2 | from urllib.error import HTTPError | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from .vimeo import VimeoIE | |
6 | ||
7 | from ..compat import compat_urllib_parse_unquote | |
8 | from ..utils import ( | |
9 | clean_html, | |
10 | determine_ext, | |
11 | ExtractorError, | |
12 | int_or_none, | |
13 | KNOWN_EXTENSIONS, | |
14 | mimetype2ext, | |
15 | parse_iso8601, | |
16 | str_or_none, | |
17 | traverse_obj, | |
18 | try_get, | |
19 | url_or_none, | |
20 | ) | |
21 | ||
22 | ||
23 | class PatreonBaseIE(InfoExtractor): | |
24 | USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)' | |
25 | ||
26 | def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None): | |
27 | if headers is None: | |
28 | headers = {} | |
29 | if 'User-Agent' not in headers: | |
30 | headers['User-Agent'] = self.USER_AGENT | |
31 | if query: | |
32 | query.update({'json-api-version': 1.0}) | |
33 | ||
34 | try: | |
35 | return self._download_json( | |
36 | f'https://www.patreon.com/api/{ep}', | |
37 | item_id, note='Downloading API JSON' if not note else note, | |
38 | query=query, fatal=fatal, headers=headers) | |
39 | except ExtractorError as e: | |
40 | if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.headers.get('Content-Type')) != 'json': | |
41 | raise | |
42 | err_json = self._parse_json(self._webpage_read_content(e.cause, None, item_id), item_id, fatal=False) | |
43 | err_message = traverse_obj(err_json, ('errors', ..., 'detail'), get_all=False) | |
44 | if err_message: | |
45 | raise ExtractorError(f'Patreon said: {err_message}', expected=True) | |
46 | raise | |
47 | ||
48 | ||
49 | class PatreonIE(PatreonBaseIE): | |
50 | _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)' | |
51 | _TESTS = [{ | |
52 | 'url': 'http://www.patreon.com/creation?hid=743933', | |
53 | 'md5': 'e25505eec1053a6e6813b8ed369875cc', | |
54 | 'info_dict': { | |
55 | 'id': '743933', | |
56 | 'ext': 'mp3', | |
57 | 'title': 'Episode 166: David Smalley of Dogma Debate', | |
58 | 'description': 'md5:34d207dd29aa90e24f1b3f58841b81c7', | |
59 | 'uploader': 'Cognitive Dissonance Podcast', | |
60 | 'thumbnail': 're:^https?://.*$', | |
61 | 'timestamp': 1406473987, | |
62 | 'upload_date': '20140727', | |
63 | 'uploader_id': '87145', | |
64 | 'like_count': int, | |
65 | 'comment_count': int, | |
66 | 'uploader_url': 'https://www.patreon.com/dissonancepod', | |
67 | 'channel_id': '80642', | |
68 | 'channel_url': 'https://www.patreon.com/dissonancepod', | |
69 | 'channel_follower_count': int, | |
70 | }, | |
71 | }, { | |
72 | 'url': 'http://www.patreon.com/creation?hid=754133', | |
73 | 'md5': '3eb09345bf44bf60451b8b0b81759d0a', | |
74 | 'info_dict': { | |
75 | 'id': '754133', | |
76 | 'ext': 'mp3', | |
77 | 'title': 'CD 167 Extra', | |
78 | 'uploader': 'Cognitive Dissonance Podcast', | |
79 | 'thumbnail': 're:^https?://.*$', | |
80 | 'like_count': int, | |
81 | 'comment_count': int, | |
82 | 'uploader_url': 'https://www.patreon.com/dissonancepod', | |
83 | }, | |
84 | 'skip': 'Patron-only content', | |
85 | }, { | |
86 | 'url': 'https://www.patreon.com/creation?hid=1682498', | |
87 | 'info_dict': { | |
88 | 'id': 'SU4fj_aEMVw', | |
89 | 'ext': 'mp4', | |
90 | 'title': 'I\'m on Patreon!', | |
91 | 'uploader': 'TraciJHines', | |
92 | 'thumbnail': 're:^https?://.*$', | |
93 | 'upload_date': '20150211', | |
94 | 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364', | |
95 | 'uploader_id': 'TraciJHines', | |
96 | 'categories': ['Entertainment'], | |
97 | 'duration': 282, | |
98 | 'view_count': int, | |
99 | 'tags': 'count:39', | |
100 | 'age_limit': 0, | |
101 | 'channel': 'TraciJHines', | |
102 | 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg', | |
103 | 'live_status': 'not_live', | |
104 | 'like_count': int, | |
105 | 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', | |
106 | 'availability': 'public', | |
107 | 'channel_follower_count': int, | |
108 | 'playable_in_embed': True, | |
109 | 'uploader_url': 'http://www.youtube.com/user/TraciJHines', | |
110 | 'comment_count': int, | |
111 | }, | |
112 | 'params': { | |
113 | 'noplaylist': True, | |
114 | 'skip_download': True, | |
115 | } | |
116 | }, { | |
117 | 'url': 'https://www.patreon.com/posts/episode-166-of-743933', | |
118 | 'only_matching': True, | |
119 | }, { | |
120 | 'url': 'https://www.patreon.com/posts/743933', | |
121 | 'only_matching': True, | |
122 | }, { | |
123 | 'url': 'https://www.patreon.com/posts/kitchen-as-seen-51706779', | |
124 | 'md5': '96656690071f6d64895866008484251b', | |
125 | 'info_dict': { | |
126 | 'id': '555089736', | |
127 | 'ext': 'mp4', | |
128 | 'title': 'KITCHEN AS SEEN ON DEEZ NUTS EXTENDED!', | |
129 | 'uploader': 'Cold Ones', | |
130 | 'thumbnail': 're:^https?://.*$', | |
131 | 'upload_date': '20210526', | |
132 | 'description': 'md5:557a409bd79d3898689419094934ba79', | |
133 | 'uploader_id': '14936315', | |
134 | }, | |
135 | 'skip': 'Patron-only content' | |
136 | }, { | |
137 | # m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277) | |
138 | 'url': 'https://www.patreon.com/posts/video-sketchbook-32452882', | |
139 | 'info_dict': { | |
140 | 'id': '32452882', | |
141 | 'ext': 'mp4', | |
142 | 'comment_count': int, | |
143 | 'uploader_id': '4301314', | |
144 | 'like_count': int, | |
145 | 'timestamp': 1576696962, | |
146 | 'upload_date': '20191218', | |
147 | 'thumbnail': r're:^https?://.*$', | |
148 | 'uploader_url': 'https://www.patreon.com/loish', | |
149 | 'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2', | |
150 | 'title': 'VIDEO // sketchbook flipthrough', | |
151 | 'uploader': 'Loish ', | |
152 | 'tags': ['sketchbook', 'video'], | |
153 | 'channel_id': '1641751', | |
154 | 'channel_url': 'https://www.patreon.com/loish', | |
155 | 'channel_follower_count': int, | |
156 | } | |
157 | }, { | |
158 | # bad videos under media (if media is included). Real one is under post_file | |
159 | 'url': 'https://www.patreon.com/posts/premium-access-70282931', | |
160 | 'info_dict': { | |
161 | 'id': '70282931', | |
162 | 'ext': 'mp4', | |
163 | 'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction', | |
164 | 'channel_url': 'https://www.patreon.com/thenormies', | |
165 | 'channel_id': '573397', | |
166 | 'uploader_id': '2929435', | |
167 | 'uploader': 'The Normies', | |
168 | 'description': 'md5:79c9fd8778e2cef84049a94c058a5e23', | |
169 | 'comment_count': int, | |
170 | 'upload_date': '20220809', | |
171 | 'thumbnail': r're:^https?://.*$', | |
172 | 'channel_follower_count': int, | |
173 | 'like_count': int, | |
174 | 'timestamp': 1660052820, | |
175 | 'tags': ['The Office', 'early access', 'uncut'], | |
176 | 'uploader_url': 'https://www.patreon.com/thenormies', | |
177 | }, | |
178 | 'skip': 'Patron-only content', | |
179 | }] | |
180 | ||
181 | def _real_extract(self, url): | |
182 | video_id = self._match_id(url) | |
183 | post = self._call_api( | |
184 | f'posts/{video_id}', video_id, query={ | |
185 | 'fields[media]': 'download_url,mimetype,size_bytes', | |
186 | 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title,current_user_can_view', | |
187 | 'fields[user]': 'full_name,url', | |
188 | 'fields[post_tag]': 'value', | |
189 | 'fields[campaign]': 'url,name,patron_count', | |
190 | 'json-api-use-default-includes': 'false', | |
191 | 'include': 'audio,user,user_defined_tags,campaign,attachments_media', | |
192 | }) | |
193 | attributes = post['data']['attributes'] | |
194 | title = attributes['title'].strip() | |
195 | image = attributes.get('image') or {} | |
196 | info = { | |
197 | 'id': video_id, | |
198 | 'title': title, | |
199 | 'description': clean_html(attributes.get('content')), | |
200 | 'thumbnail': image.get('large_url') or image.get('url'), | |
201 | 'timestamp': parse_iso8601(attributes.get('published_at')), | |
202 | 'like_count': int_or_none(attributes.get('like_count')), | |
203 | 'comment_count': int_or_none(attributes.get('comment_count')), | |
204 | } | |
205 | can_view_post = traverse_obj(attributes, 'current_user_can_view') | |
206 | if can_view_post and info['comment_count']: | |
207 | info['__post_extractor'] = self.extract_comments(video_id) | |
208 | ||
209 | for i in post.get('included', []): | |
210 | i_type = i.get('type') | |
211 | if i_type == 'media': | |
212 | media_attributes = i.get('attributes') or {} | |
213 | download_url = media_attributes.get('download_url') | |
214 | ext = mimetype2ext(media_attributes.get('mimetype')) | |
215 | ||
216 | # if size_bytes is None, this media file is likely unavailable | |
217 | # See: https://github.com/yt-dlp/yt-dlp/issues/4608 | |
218 | size_bytes = int_or_none(media_attributes.get('size_bytes')) | |
219 | if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None: | |
220 | # XXX: what happens if there are multiple attachments? | |
221 | return { | |
222 | **info, | |
223 | 'ext': ext, | |
224 | 'filesize': size_bytes, | |
225 | 'url': download_url, | |
226 | } | |
227 | elif i_type == 'user': | |
228 | user_attributes = i.get('attributes') | |
229 | if user_attributes: | |
230 | info.update({ | |
231 | 'uploader': user_attributes.get('full_name'), | |
232 | 'uploader_id': str_or_none(i.get('id')), | |
233 | 'uploader_url': user_attributes.get('url'), | |
234 | }) | |
235 | ||
236 | elif i_type == 'post_tag': | |
237 | info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value'))) | |
238 | ||
239 | elif i_type == 'campaign': | |
240 | info.update({ | |
241 | 'channel': traverse_obj(i, ('attributes', 'title')), | |
242 | 'channel_id': str_or_none(i.get('id')), | |
243 | 'channel_url': traverse_obj(i, ('attributes', 'url')), | |
244 | 'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))), | |
245 | }) | |
246 | ||
247 | # handle Vimeo embeds | |
248 | if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo': | |
249 | embed_html = try_get(attributes, lambda x: x['embed']['html']) | |
250 | v_url = url_or_none(compat_urllib_parse_unquote( | |
251 | self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False))) | |
252 | if v_url: | |
253 | return { | |
254 | **info, | |
255 | '_type': 'url_transparent', | |
256 | 'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'), | |
257 | 'ie_key': 'Vimeo', | |
258 | } | |
259 | ||
260 | embed_url = try_get(attributes, lambda x: x['embed']['url']) | |
261 | if embed_url: | |
262 | return { | |
263 | **info, | |
264 | '_type': 'url', | |
265 | 'url': embed_url, | |
266 | } | |
267 | ||
268 | post_file = traverse_obj(attributes, 'post_file') | |
269 | if post_file: | |
270 | name = post_file.get('name') | |
271 | ext = determine_ext(name) | |
272 | if ext in KNOWN_EXTENSIONS: | |
273 | return { | |
274 | **info, | |
275 | 'ext': ext, | |
276 | 'url': post_file['url'], | |
277 | } | |
278 | elif name == 'video': | |
279 | formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id) | |
280 | return { | |
281 | **info, | |
282 | 'formats': formats, | |
283 | 'subtitles': subtitles, | |
284 | } | |
285 | ||
286 | if can_view_post is False: | |
287 | self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True) | |
288 | else: | |
289 | self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True) | |
290 | return info | |
291 | ||
292 | def _get_comments(self, post_id): | |
293 | cursor = None | |
294 | count = 0 | |
295 | params = { | |
296 | 'page[count]': 50, | |
297 | 'include': 'parent.commenter.campaign,parent.post.user,parent.post.campaign.creator,parent.replies.parent,parent.replies.commenter.campaign,parent.replies.post.user,parent.replies.post.campaign.creator,commenter.campaign,post.user,post.campaign.creator,replies.parent,replies.commenter.campaign,replies.post.user,replies.post.campaign.creator,on_behalf_of_campaign', | |
298 | 'fields[comment]': 'body,created,is_by_creator', | |
299 | 'fields[user]': 'image_url,full_name,url', | |
300 | 'filter[flair]': 'image_tiny_url,name', | |
301 | 'sort': '-created', | |
302 | 'json-api-version': 1.0, | |
303 | 'json-api-use-default-includes': 'false', | |
304 | } | |
305 | ||
306 | for page in itertools.count(1): | |
307 | ||
308 | params.update({'page[cursor]': cursor} if cursor else {}) | |
309 | response = self._call_api( | |
310 | f'posts/{post_id}/comments', post_id, query=params, note='Downloading comments page %d' % page) | |
311 | ||
312 | cursor = None | |
313 | for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...), default=[]): | |
314 | count += 1 | |
315 | comment_id = comment.get('id') | |
316 | attributes = comment.get('attributes') or {} | |
317 | if comment_id is None: | |
318 | continue | |
319 | author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id')) | |
320 | author_info = traverse_obj( | |
321 | response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'), | |
322 | get_all=False, expected_type=dict, default={}) | |
323 | ||
324 | yield { | |
325 | 'id': comment_id, | |
326 | 'text': attributes.get('body'), | |
327 | 'timestamp': parse_iso8601(attributes.get('created')), | |
328 | 'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'), | |
329 | 'author_is_uploader': attributes.get('is_by_creator'), | |
330 | 'author_id': author_id, | |
331 | 'author': author_info.get('full_name'), | |
332 | 'author_thumbnail': author_info.get('image_url'), | |
333 | } | |
334 | ||
335 | if count < traverse_obj(response, ('meta', 'count')): | |
336 | cursor = traverse_obj(response, ('data', -1, 'id')) | |
337 | ||
338 | if cursor is None: | |
339 | break | |
340 | ||
341 | ||
342 | class PatreonCampaignIE(PatreonBaseIE): | |
343 | ||
344 | _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))' | |
345 | _TESTS = [{ | |
346 | 'url': 'https://www.patreon.com/dissonancepod/', | |
347 | 'info_dict': { | |
348 | 'title': 'Cognitive Dissonance Podcast', | |
349 | 'channel_url': 'https://www.patreon.com/dissonancepod', | |
350 | 'id': '80642', | |
351 | 'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7', | |
352 | 'channel_id': '80642', | |
353 | 'channel': 'Cognitive Dissonance Podcast', | |
354 | 'age_limit': 0, | |
355 | 'channel_follower_count': int, | |
356 | 'uploader_id': '87145', | |
357 | 'uploader_url': 'https://www.patreon.com/dissonancepod', | |
358 | 'uploader': 'Cognitive Dissonance Podcast', | |
359 | 'thumbnail': r're:^https?://.*$', | |
360 | }, | |
361 | 'playlist_mincount': 68, | |
362 | }, { | |
363 | 'url': 'https://www.patreon.com/m/4767637/posts', | |
364 | 'info_dict': { | |
365 | 'title': 'Not Just Bikes', | |
366 | 'channel_follower_count': int, | |
367 | 'id': '4767637', | |
368 | 'channel_id': '4767637', | |
369 | 'channel_url': 'https://www.patreon.com/notjustbikes', | |
370 | 'description': 'md5:595c6e7dca76ae615b1d38c298a287a1', | |
371 | 'age_limit': 0, | |
372 | 'channel': 'Not Just Bikes', | |
373 | 'uploader_url': 'https://www.patreon.com/notjustbikes', | |
374 | 'uploader': 'Not Just Bikes', | |
375 | 'uploader_id': '37306634', | |
376 | 'thumbnail': r're:^https?://.*$', | |
377 | }, | |
378 | 'playlist_mincount': 71 | |
379 | }, { | |
380 | 'url': 'https://www.patreon.com/dissonancepod/posts', | |
381 | 'only_matching': True | |
382 | }, { | |
383 | 'url': 'https://www.patreon.com/m/5932659', | |
384 | 'only_matching': True | |
385 | }] | |
386 | ||
387 | @classmethod | |
388 | def suitable(cls, url): | |
389 | return False if PatreonIE.suitable(url) else super(PatreonCampaignIE, cls).suitable(url) | |
390 | ||
391 | def _entries(self, campaign_id): | |
392 | cursor = None | |
393 | params = { | |
394 | 'fields[post]': 'patreon_url,url', | |
395 | 'filter[campaign_id]': campaign_id, | |
396 | 'filter[is_draft]': 'false', | |
397 | 'sort': '-published_at', | |
398 | 'json-api-use-default-includes': 'false', | |
399 | } | |
400 | ||
401 | for page in itertools.count(1): | |
402 | ||
403 | params.update({'page[cursor]': cursor} if cursor else {}) | |
404 | posts_json = self._call_api('posts', campaign_id, query=params, note='Downloading posts page %d' % page) | |
405 | ||
406 | cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next')) | |
407 | for post in posts_json.get('data') or []: | |
408 | yield self.url_result(url_or_none(traverse_obj(post, ('attributes', 'patreon_url'))), 'Patreon') | |
409 | ||
410 | if cursor is None: | |
411 | break | |
412 | ||
413 | def _real_extract(self, url): | |
414 | ||
415 | campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') | |
416 | if campaign_id is None: | |
417 | webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT}) | |
418 | campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID') | |
419 | ||
420 | params = { | |
421 | 'json-api-use-default-includes': 'false', | |
422 | 'fields[user]': 'full_name,url', | |
423 | 'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url', | |
424 | 'include': 'creator' | |
425 | } | |
426 | ||
427 | campaign_response = self._call_api( | |
428 | f'campaigns/{campaign_id}', campaign_id, | |
429 | note='Downloading campaign info', fatal=False, | |
430 | query=params) or {} | |
431 | ||
432 | campaign_info = campaign_response.get('data') or {} | |
433 | channel_name = traverse_obj(campaign_info, ('attributes', 'name')) | |
434 | user_info = traverse_obj( | |
435 | campaign_response, ('included', lambda _, v: v['type'] == 'user'), | |
436 | default={}, expected_type=dict, get_all=False) | |
437 | ||
438 | return { | |
439 | '_type': 'playlist', | |
440 | 'id': campaign_id, | |
441 | 'title': channel_name, | |
442 | 'entries': self._entries(campaign_id), | |
443 | 'description': clean_html(traverse_obj(campaign_info, ('attributes', 'summary'))), | |
444 | 'channel_url': traverse_obj(campaign_info, ('attributes', 'url')), | |
445 | 'channel_follower_count': int_or_none(traverse_obj(campaign_info, ('attributes', 'patron_count'))), | |
446 | 'channel_id': campaign_id, | |
447 | 'channel': channel_name, | |
448 | 'uploader_url': traverse_obj(user_info, ('attributes', 'url')), | |
449 | 'uploader_id': str_or_none(user_info.get('id')), | |
450 | 'uploader': traverse_obj(user_info, ('attributes', 'full_name')), | |
451 | 'playlist_count': traverse_obj(campaign_info, ('attributes', 'creation_count')), | |
452 | 'age_limit': 18 if traverse_obj(campaign_info, ('attributes', 'is_nsfw')) else 0, | |
453 | 'thumbnail': url_or_none(traverse_obj(campaign_info, ('attributes', 'avatar_photo_url'))), | |
454 | } |