]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/floatplane.py
[cleanup] Misc (#8598)
[yt-dlp.git] / yt_dlp / extractor / floatplane.py
1 import functools
2
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 OnDemandPagedList,
7 clean_html,
8 determine_ext,
9 format_field,
10 int_or_none,
11 join_nonempty,
12 parse_codecs,
13 parse_iso8601,
14 urljoin,
15 )
16 from ..utils.traversal import traverse_obj
17
18
19 class FloatplaneIE(InfoExtractor):
20 _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
21 _TESTS = [{
22 'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
23 'info_dict': {
24 'id': 'yuleLogLTT',
25 'ext': 'mp4',
26 'display_id': '2Yf3UedF7C',
27 'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours',
28 'description': 'md5:adf2970e0de1c5e3df447818bb0309f6',
29 'thumbnail': r're:^https?://.*\.jpe?g$',
30 'duration': 36035,
31 'comment_count': int,
32 'like_count': int,
33 'dislike_count': int,
34 'release_date': '20191206',
35 'release_timestamp': 1575657000,
36 'uploader': 'LinusTechTips',
37 'uploader_id': '59f94c0bdd241b70349eb72b',
38 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
39 'channel': 'Linus Tech Tips',
40 'channel_id': '63fe42c309e691e4e36de93d',
41 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main',
42 'availability': 'subscriber_only',
43 },
44 'params': {'skip_download': 'm3u8'},
45 }, {
46 'url': 'https://www.floatplane.com/post/j2jqG3JmgJ',
47 'info_dict': {
48 'id': 'j2jqG3JmgJ',
49 'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?',
50 'description': 'md5:00bf17dc5733e4031e99b7fd6489f274',
51 'thumbnail': r're:^https?://.*\.jpe?g$',
52 'comment_count': int,
53 'like_count': int,
54 'dislike_count': int,
55 'release_timestamp': 1671915900,
56 'release_date': '20221224',
57 'uploader': 'LinusTechTips',
58 'uploader_id': '59f94c0bdd241b70349eb72b',
59 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
60 'channel': "They're Just Movies",
61 'channel_id': '64135f82fc76ab7f9fbdc876',
62 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm',
63 'availability': 'subscriber_only',
64 },
65 'playlist_count': 2,
66 }, {
67 'url': 'https://www.floatplane.com/post/3tK2tInhoN',
68 'info_dict': {
69 'id': '3tK2tInhoN',
70 'title': 'Extras - How Linus Communicates with Editors (Compensator 4)',
71 'description': 'md5:83cd40aae1ce124df33769600c80ca5b',
72 'thumbnail': r're:^https?://.*\.jpe?g$',
73 'comment_count': int,
74 'like_count': int,
75 'dislike_count': int,
76 'release_timestamp': 1700529120,
77 'release_date': '20231121',
78 'uploader': 'LinusTechTips',
79 'uploader_id': '59f94c0bdd241b70349eb72b',
80 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
81 'channel': 'FP Exclusives',
82 'channel_id': '6413623f5b12cca228a28e78',
83 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive',
84 'availability': 'subscriber_only',
85 },
86 'playlist_count': 2,
87 }, {
88 'url': 'https://beta.floatplane.com/post/d870PEFXS1',
89 'info_dict': {
90 'id': 'bg9SuYKEww',
91 'ext': 'mp4',
92 'display_id': 'd870PEFXS1',
93 'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!',
94 'description': 'md5:80d612dcabf41b17487afcbe303ec57d',
95 'thumbnail': r're:^https?://.*\.jpe?g$',
96 'release_timestamp': 1700622000,
97 'release_date': '20231122',
98 'duration': 513,
99 'like_count': int,
100 'dislike_count': int,
101 'comment_count': int,
102 'uploader': 'LinusTechTips',
103 'uploader_id': '59f94c0bdd241b70349eb72b',
104 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
105 'channel': 'GameLinked',
106 'channel_id': '649dbade3540dbc3945eeda7',
107 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked',
108 'availability': 'subscriber_only',
109 },
110 'params': {'skip_download': 'm3u8'},
111 }]
112
113 def _real_initialize(self):
114 if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
115 self.raise_login_required()
116
117 def _real_extract(self, url):
118 post_id = self._match_id(url)
119
120 post_data = self._download_json(
121 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
122 note='Downloading post data', errnote='Unable to download post data')
123
124 if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
125 raise ExtractorError('Post does not contain a video or audio track', expected=True)
126
127 items = []
128 for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
129 media_id = media['id']
130 media_typ = media.get('type') or 'video'
131
132 metadata = self._download_json(
133 f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
134 note=f'Downloading {media_typ} metadata')
135
136 stream = self._download_json(
137 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
138 'type': 'vod' if media_typ == 'video' else 'aod',
139 'guid': metadata['guid']
140 }, note=f'Downloading {media_typ} stream data')
141
142 path_template = traverse_obj(stream, ('resource', 'uri', {str}))
143
144 def format_path(params):
145 path = path_template
146 for i, val in (params or {}).items():
147 path = path.replace(f'{{qualityLevelParams.{i}}}', val)
148 return path
149
150 formats = []
151 for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
152 url = urljoin(stream['cdn'], format_path(traverse_obj(
153 stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
154 formats.append({
155 **traverse_obj(quality, {
156 'format_id': 'name',
157 'format_note': 'label',
158 'width': ('width', {int}),
159 'height': ('height', {int}),
160 }),
161 **parse_codecs(quality.get('codecs')),
162 'url': url,
163 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
164 })
165
166 items.append({
167 'id': media_id,
168 **traverse_obj(metadata, {
169 'title': 'title',
170 'duration': ('duration', {int_or_none}),
171 'thumbnail': ('thumbnail', 'path'),
172 }),
173 'formats': formats,
174 })
175
176 uploader_url = format_field(
177 post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
178 channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
179
180 post_info = {
181 'id': post_id,
182 'display_id': post_id,
183 **traverse_obj(post_data, {
184 'title': 'title',
185 'description': ('text', {clean_html}),
186 'uploader': ('creator', 'title'),
187 'uploader_id': ('creator', 'id'),
188 'channel': ('channel', 'title'),
189 'channel_id': ('channel', 'id'),
190 'like_count': ('likes', {int_or_none}),
191 'dislike_count': ('dislikes', {int_or_none}),
192 'comment_count': ('comments', {int_or_none}),
193 'release_timestamp': ('releaseDate', {parse_iso8601}),
194 'thumbnail': ('thumbnail', 'path'),
195 }),
196 'uploader_url': uploader_url,
197 'channel_url': channel_url,
198 'availability': self._availability(needs_subscription=True),
199 }
200
201 if len(items) > 1:
202 return self.playlist_result(items, **post_info)
203
204 post_info.update(items[0])
205 return post_info
206
207
208 class FloatplaneChannelIE(InfoExtractor):
209 _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
210 _PAGE_SIZE = 20
211 _TESTS = [{
212 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
213 'info_dict': {
214 'id': 'linustechtips/ltxexpo',
215 'title': 'LTX Expo',
216 'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149',
217 },
218 'playlist_mincount': 51,
219 }, {
220 'url': 'https://www.floatplane.com/channel/ShankMods/home',
221 'info_dict': {
222 'id': 'ShankMods',
223 'title': 'Shank Mods',
224 'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30',
225 },
226 'playlist_mincount': 14,
227 }, {
228 'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home',
229 'info_dict': {
230 'id': 'bitwit_ultra',
231 'title': 'Bitwit Ultra',
232 'description': 'md5:1452f280bb45962976d4789200f676dd',
233 },
234 'playlist_mincount': 200,
235 }]
236
237 def _fetch_page(self, display_id, creator_id, channel_id, page):
238 query = {
239 'id': creator_id,
240 'limit': self._PAGE_SIZE,
241 'fetchAfter': page * self._PAGE_SIZE,
242 }
243 if channel_id:
244 query['channel'] = channel_id
245 page_data = self._download_json(
246 'https://www.floatplane.com/api/v3/content/creator', display_id,
247 query=query, note=f'Downloading page {page + 1}')
248 for post in page_data or []:
249 yield self.url_result(
250 f'https://www.floatplane.com/post/{post["id"]}',
251 FloatplaneIE, id=post['id'], title=post.get('title'),
252 release_timestamp=parse_iso8601(post.get('releaseDate')))
253
254 def _real_extract(self, url):
255 creator, channel = self._match_valid_url(url).group('id', 'channel')
256 display_id = join_nonempty(creator, channel, delim='/')
257
258 creator_data = self._download_json(
259 'https://www.floatplane.com/api/v3/creator/named',
260 display_id, query={'creatorURL[0]': creator})[0]
261
262 channel_data = traverse_obj(
263 creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
264
265 return self.playlist_result(OnDemandPagedList(functools.partial(
266 self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
267 display_id, title=channel_data.get('title') or creator_data.get('title'),
268 description=channel_data.get('about') or creator_data.get('about'))