]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/pr0gramm.py
[cleanup] Misc (#8598)
[yt-dlp.git] / yt_dlp / extractor / pr0gramm.py
1 import json
2 from datetime import date
3 from urllib.parse import unquote
4
5 from .common import InfoExtractor
6 from ..compat import functools
7 from ..utils import (
8 ExtractorError,
9 float_or_none,
10 int_or_none,
11 make_archive_id,
12 mimetype2ext,
13 urljoin,
14 )
15 from ..utils.traversal import traverse_obj
16
17
18 class Pr0grammIE(InfoExtractor):
19 _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
20 _TESTS = [{
21 # Tags require account
22 'url': 'https://pr0gramm.com/new/video/5466437',
23 'info_dict': {
24 'id': '5466437',
25 'ext': 'mp4',
26 'title': 'pr0gramm-5466437 by g11st',
27 'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
28 'uploader': 'g11st',
29 'uploader_id': 394718,
30 'upload_timestamp': 1671590240,
31 'upload_date': '20221221',
32 'like_count': int,
33 'dislike_count': int,
34 'age_limit': 0,
35 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
36 '_old_archive_ids': ['pr0grammstatic 5466437'],
37 },
38 }, {
39 # Tags require account
40 'url': 'https://pr0gramm.com/new/3052805:comment28391322',
41 'info_dict': {
42 'id': '3052805',
43 'ext': 'mp4',
44 'title': 'pr0gramm-3052805 by Hansking1',
45 'tags': 'count:15',
46 'uploader': 'Hansking1',
47 'uploader_id': 385563,
48 'upload_timestamp': 1552930408,
49 'upload_date': '20190318',
50 'like_count': int,
51 'dislike_count': int,
52 'age_limit': 0,
53 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
54 '_old_archive_ids': ['pr0grammstatic 3052805'],
55 },
56 }, {
57 # Requires verified account
58 'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
59 'info_dict': {
60 'id': '5848332',
61 'ext': 'mp4',
62 'title': 'pr0gramm-5848332 by erd0pfel',
63 'tags': 'count:18',
64 'uploader': 'erd0pfel',
65 'uploader_id': 349094,
66 'upload_timestamp': 1694489652,
67 'upload_date': '20230912',
68 'like_count': int,
69 'dislike_count': int,
70 'age_limit': 18,
71 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
72 '_old_archive_ids': ['pr0grammstatic 5848332'],
73 },
74 }, {
75 'url': 'https://pr0gramm.com/static/5466437',
76 'only_matching': True,
77 }, {
78 'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
79 'only_matching': True,
80 }, {
81 'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
82 'only_matching': True,
83 }]
84
85 BASE_URL = 'https://pr0gramm.com'
86
87 @functools.cached_property
88 def _is_logged_in(self):
89 return 'pp' in self._get_cookies(self.BASE_URL)
90
91 @functools.cached_property
92 def _maximum_flags(self):
93 # We need to guess the flags for the content otherwise the api will raise an error
94 # We can guess the maximum allowed flags for the account from the cookies
95 # Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
96 flags = 0b0001
97 if self._is_logged_in:
98 flags |= 0b1000
99 cookies = self._get_cookies(self.BASE_URL)
100 if 'me' not in cookies:
101 self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
102 if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
103 flags |= 0b0110
104
105 return flags
106
107 def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
108 data = self._download_json(
109 f'https://pr0gramm.com/api/items/{endpoint}',
110 video_id, note, query=query, expected_status=403)
111
112 error = traverse_obj(data, ('error', {str}))
113 if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
114 if not self._is_logged_in:
115 self.raise_login_required()
116 raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
117 elif error:
118 message = traverse_obj(data, ('msg', {str})) or error
119 raise ExtractorError(f'API returned error: {message}', expected=True)
120
121 return data
122
123 @staticmethod
124 def _create_source_url(path):
125 return urljoin('https://img.pr0gramm.com', path)
126
127 def _real_extract(self, url):
128 video_id = self._match_id(url)
129 video_info = traverse_obj(
130 self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
131 ('items', 0, {dict}))
132
133 source = video_info.get('image')
134 if not source or not source.endswith('mp4'):
135 self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
136
137 tags = None
138 if self._is_logged_in:
139 metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
140 tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
141 # Sorted by "confidence", higher confidence = earlier in list
142 confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
143 if confidences:
144 tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
145
146 formats = traverse_obj(video_info, ('variants', ..., {
147 'format_id': ('name', {str}),
148 'url': ('path', {self._create_source_url}),
149 'ext': ('mimeType', {mimetype2ext}),
150 'vcodec': ('codec', {str}),
151 'width': ('width', {int_or_none}),
152 'height': ('height', {int_or_none}),
153 'bitrate': ('bitRate', {float_or_none}),
154 'filesize': ('fileSize', {int_or_none}),
155 })) if video_info.get('variants') else [{
156 'ext': 'mp4',
157 'format_id': 'source',
158 **traverse_obj(video_info, {
159 'url': ('image', {self._create_source_url}),
160 'width': ('width', {int_or_none}),
161 'height': ('height', {int_or_none}),
162 }),
163 }]
164
165 subtitles = {}
166 for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])):
167 subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, {
168 'url': ('path', {self._create_source_url}),
169 'note': ('label', {str}),
170 }))
171
172 return {
173 'id': video_id,
174 'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
175 'tags': tags,
176 'formats': formats,
177 'subtitles': subtitles,
178 'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
179 '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
180 **traverse_obj(video_info, {
181 'uploader': ('user', {str}),
182 'uploader_id': ('userId', {int}),
183 'like_count': ('up', {int}),
184 'dislike_count': ('down', {int}),
185 'upload_timestamp': ('created', {int}),
186 'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
187 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
188 }),
189 }