]>
Commit | Line | Data |
---|---|---|
b532556d SS |
1 | import json |
2 | from datetime import date | |
3 | from urllib.parse import unquote | |
45b2ee6f | 4 | |
5 | from .common import InfoExtractor | |
b532556d SS |
6 | from ..compat import functools |
7 | from ..utils import ExtractorError, make_archive_id, urljoin | |
8 | from ..utils.traversal import traverse_obj | |
45b2ee6f | 9 | |
10 | ||
b532556d SS |
11 | class Pr0grammIE(InfoExtractor): |
12 | _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)' | |
13 | _TESTS = [{ | |
14 | # Tags require account | |
15 | 'url': 'https://pr0gramm.com/new/video/5466437', | |
45b2ee6f | 16 | 'info_dict': { |
17 | 'id': '5466437', | |
18 | 'ext': 'mp4', | |
19 | 'title': 'pr0gramm-5466437 by g11st', | |
b532556d | 20 | 'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'], |
45b2ee6f | 21 | 'uploader': 'g11st', |
b532556d SS |
22 | 'uploader_id': 394718, |
23 | 'upload_timestamp': 1671590240, | |
45b2ee6f | 24 | 'upload_date': '20221221', |
b532556d SS |
25 | 'like_count': int, |
26 | 'dislike_count': int, | |
27 | 'age_limit': 0, | |
28 | 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', | |
29 | }, | |
30 | }, { | |
31 | # Tags require account | |
32 | 'url': 'https://pr0gramm.com/new/3052805:comment28391322', | |
33 | 'info_dict': { | |
34 | 'id': '3052805', | |
35 | 'ext': 'mp4', | |
36 | 'title': 'pr0gramm-3052805 by Hansking1', | |
37 | 'tags': 'count:15', | |
38 | 'uploader': 'Hansking1', | |
39 | 'uploader_id': 385563, | |
40 | 'upload_timestamp': 1552930408, | |
41 | 'upload_date': '20190318', | |
42 | 'like_count': int, | |
43 | 'dislike_count': int, | |
44 | 'age_limit': 0, | |
45 | 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', | |
46 | }, | |
47 | }, { | |
48 | # Requires verified account | |
49 | 'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332', | |
50 | 'info_dict': { | |
51 | 'id': '5848332', | |
52 | 'ext': 'mp4', | |
53 | 'title': 'pr0gramm-5848332 by erd0pfel', | |
54 | 'tags': 'count:18', | |
55 | 'uploader': 'erd0pfel', | |
56 | 'uploader_id': 349094, | |
57 | 'upload_timestamp': 1694489652, | |
58 | 'upload_date': '20230912', | |
59 | 'like_count': int, | |
60 | 'dislike_count': int, | |
61 | 'age_limit': 18, | |
62 | 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', | |
63 | }, | |
64 | }, { | |
65 | 'url': 'https://pr0gramm.com/static/5466437', | |
66 | 'only_matching': True, | |
67 | }, { | |
68 | 'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805', | |
69 | 'only_matching': True, | |
70 | }, { | |
71 | 'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290', | |
72 | 'only_matching': True, | |
73 | }] | |
45b2ee6f | 74 | |
b532556d | 75 | BASE_URL = 'https://pr0gramm.com' |
45b2ee6f | 76 | |
b532556d SS |
77 | @functools.cached_property |
78 | def _is_logged_in(self): | |
79 | return 'pp' in self._get_cookies(self.BASE_URL) | |
45b2ee6f | 80 | |
b532556d SS |
81 | @functools.cached_property |
82 | def _maximum_flags(self): | |
83 | # We need to guess the flags for the content otherwise the api will raise an error | |
84 | # We can guess the maximum allowed flags for the account from the cookies | |
85 | # Bitflags are (msbf): nsfp, nsfl, nsfw, sfw | |
86 | flags = 0b0001 | |
87 | if self._is_logged_in: | |
88 | flags |= 0b1000 | |
89 | cookies = self._get_cookies(self.BASE_URL) | |
90 | if 'me' not in cookies: | |
91 | self._download_webpage(self.BASE_URL, None, 'Refreshing verification information') | |
92 | if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')): | |
93 | flags |= 0b0110 | |
45b2ee6f | 94 | |
b532556d | 95 | return flags |
45b2ee6f | 96 | |
b532556d SS |
97 | def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'): |
98 | data = self._download_json( | |
99 | f'https://pr0gramm.com/api/items/{endpoint}', | |
100 | video_id, note, query=query, expected_status=403) | |
45b2ee6f | 101 | |
b532556d SS |
102 | error = traverse_obj(data, ('error', {str})) |
103 | if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'): | |
104 | if not self._is_logged_in: | |
105 | self.raise_login_required() | |
106 | raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True) | |
107 | elif error: | |
108 | message = traverse_obj(data, ('msg', {str})) or error | |
109 | raise ExtractorError(f'API returned error: {message}', expected=True) | |
45b2ee6f | 110 | |
b532556d | 111 | return data |
45b2ee6f | 112 | |
113 | def _real_extract(self, url): | |
114 | video_id = self._match_id(url) | |
b532556d SS |
115 | video_info = traverse_obj( |
116 | self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}), | |
117 | ('items', 0, {dict})) | |
118 | ||
119 | source = urljoin('https://img.pr0gramm.com', video_info.get('image')) | |
120 | if not source or not source.endswith('mp4'): | |
121 | self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id) | |
45b2ee6f | 122 | |
b532556d SS |
123 | tags = None |
124 | if self._is_logged_in: | |
125 | metadata = self._call_api('info', video_id, {'itemId': video_id}) | |
126 | tags = traverse_obj(metadata, ('tags', ..., 'tag', {str})) | |
127 | # Sorted by "confidence", higher confidence = earlier in list | |
128 | confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float}))) | |
129 | if confidences: | |
130 | tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)] | |
131 | ||
132 | return { | |
133 | 'id': video_id, | |
134 | 'title': f'pr0gramm-{video_id} by {video_info.get("user")}', | |
135 | 'formats': [{ | |
136 | 'url': source, | |
137 | 'ext': 'mp4', | |
138 | **traverse_obj(video_info, { | |
139 | 'width': ('width', {int}), | |
140 | 'height': ('height', {int}), | |
141 | }), | |
142 | }], | |
143 | 'tags': tags, | |
144 | 'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0, | |
145 | '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)], | |
146 | **traverse_obj(video_info, { | |
147 | 'uploader': ('user', {str}), | |
148 | 'uploader_id': ('userId', {int}), | |
149 | 'like_count': ('up', {int}), | |
150 | 'dislike_count': ('down', {int}), | |
151 | 'upload_timestamp': ('created', {int}), | |
152 | 'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}), | |
153 | 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}) | |
154 | }), | |
155 | } |