]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/floatplane.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / floatplane.py
1 import functools
2
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 OnDemandPagedList,
7 clean_html,
8 determine_ext,
9 format_field,
10 int_or_none,
11 join_nonempty,
12 parse_codecs,
13 parse_iso8601,
14 url_or_none,
15 urljoin,
16 )
17 from ..utils.traversal import traverse_obj
18
19
20 class FloatplaneIE(InfoExtractor):
21 _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
22 _TESTS = [{
23 'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
24 'info_dict': {
25 'id': 'yuleLogLTT',
26 'ext': 'mp4',
27 'display_id': '2Yf3UedF7C',
28 'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours',
29 'description': 'md5:adf2970e0de1c5e3df447818bb0309f6',
30 'thumbnail': r're:^https?://.*\.jpe?g$',
31 'duration': 36035,
32 'comment_count': int,
33 'like_count': int,
34 'dislike_count': int,
35 'release_date': '20191206',
36 'release_timestamp': 1575657000,
37 'uploader': 'LinusTechTips',
38 'uploader_id': '59f94c0bdd241b70349eb72b',
39 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
40 'channel': 'Linus Tech Tips',
41 'channel_id': '63fe42c309e691e4e36de93d',
42 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main',
43 'availability': 'subscriber_only',
44 },
45 'params': {'skip_download': 'm3u8'},
46 }, {
47 'url': 'https://www.floatplane.com/post/j2jqG3JmgJ',
48 'info_dict': {
49 'id': 'j2jqG3JmgJ',
50 'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?',
51 'description': 'md5:00bf17dc5733e4031e99b7fd6489f274',
52 'thumbnail': r're:^https?://.*\.jpe?g$',
53 'comment_count': int,
54 'like_count': int,
55 'dislike_count': int,
56 'release_timestamp': 1671915900,
57 'release_date': '20221224',
58 'uploader': 'LinusTechTips',
59 'uploader_id': '59f94c0bdd241b70349eb72b',
60 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
61 'channel': "They're Just Movies",
62 'channel_id': '64135f82fc76ab7f9fbdc876',
63 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm',
64 'availability': 'subscriber_only',
65 },
66 'playlist_count': 2,
67 }, {
68 'url': 'https://www.floatplane.com/post/3tK2tInhoN',
69 'info_dict': {
70 'id': '3tK2tInhoN',
71 'title': 'Extras - How Linus Communicates with Editors (Compensator 4)',
72 'description': 'md5:83cd40aae1ce124df33769600c80ca5b',
73 'thumbnail': r're:^https?://.*\.jpe?g$',
74 'comment_count': int,
75 'like_count': int,
76 'dislike_count': int,
77 'release_timestamp': 1700529120,
78 'release_date': '20231121',
79 'uploader': 'LinusTechTips',
80 'uploader_id': '59f94c0bdd241b70349eb72b',
81 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
82 'channel': 'FP Exclusives',
83 'channel_id': '6413623f5b12cca228a28e78',
84 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive',
85 'availability': 'subscriber_only',
86 },
87 'playlist_count': 2,
88 }, {
89 'url': 'https://beta.floatplane.com/post/d870PEFXS1',
90 'info_dict': {
91 'id': 'bg9SuYKEww',
92 'ext': 'mp4',
93 'display_id': 'd870PEFXS1',
94 'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!',
95 'description': 'md5:80d612dcabf41b17487afcbe303ec57d',
96 'thumbnail': r're:^https?://.*\.jpe?g$',
97 'release_timestamp': 1700622000,
98 'release_date': '20231122',
99 'duration': 513,
100 'like_count': int,
101 'dislike_count': int,
102 'comment_count': int,
103 'uploader': 'LinusTechTips',
104 'uploader_id': '59f94c0bdd241b70349eb72b',
105 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
106 'channel': 'GameLinked',
107 'channel_id': '649dbade3540dbc3945eeda7',
108 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked',
109 'availability': 'subscriber_only',
110 },
111 'params': {'skip_download': 'm3u8'},
112 }, {
113 'url': 'https://www.floatplane.com/post/65B5PNoBtf',
114 'info_dict': {
115 'id': '65B5PNoBtf',
116 'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
117 'display_id': '65B5PNoBtf',
118 'like_count': int,
119 'release_timestamp': 1701249480,
120 'uploader': 'The Trash Network',
121 'availability': 'subscriber_only',
122 'uploader_id': '61bc20c9a131fb692bf2a513',
123 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
124 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
125 'comment_count': int,
126 'title': 'The $50 electronic drum kit.',
127 'channel_id': '64424fe73cd58cbcf8d8e131',
128 'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
129 'dislike_count': int,
130 'channel': 'The Drum Thing',
131 'release_date': '20231129',
132 },
133 'playlist_count': 2,
134 'playlist': [{
135 'info_dict': {
136 'id': 'ISPJjexylS',
137 'ext': 'mp4',
138 'release_date': '20231129',
139 'release_timestamp': 1701249480,
140 'title': 'The $50 electronic drum kit. .mov',
141 'channel_id': '64424fe73cd58cbcf8d8e131',
142 'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
143 'availability': 'subscriber_only',
144 'uploader': 'The Trash Network',
145 'duration': 622,
146 'channel': 'The Drum Thing',
147 'uploader_id': '61bc20c9a131fb692bf2a513',
148 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
149 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
150 },
151 }, {
152 'info_dict': {
153 'id': 'qKfxu6fEpu',
154 'ext': 'aac',
155 'release_date': '20231129',
156 'release_timestamp': 1701249480,
157 'title': 'Roland TD-7 Demo.m4a',
158 'channel_id': '64424fe73cd58cbcf8d8e131',
159 'availability': 'subscriber_only',
160 'uploader': 'The Trash Network',
161 'duration': 114,
162 'channel': 'The Drum Thing',
163 'uploader_id': '61bc20c9a131fb692bf2a513',
164 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
165 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
166 },
167 }],
168 'skip': 'requires subscription: "The Trash Network"',
169 'params': {'skip_download': 'm3u8'},
170 }]
171
172 def _real_initialize(self):
173 if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
174 self.raise_login_required()
175
176 def _real_extract(self, url):
177 post_id = self._match_id(url)
178
179 post_data = self._download_json(
180 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
181 note='Downloading post data', errnote='Unable to download post data')
182
183 if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
184 raise ExtractorError('Post does not contain a video or audio track', expected=True)
185
186 uploader_url = format_field(
187 post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
188
189 common_info = {
190 'uploader_url': uploader_url,
191 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
192 'availability': self._availability(needs_subscription=True),
193 **traverse_obj(post_data, {
194 'uploader': ('creator', 'title', {str}),
195 'uploader_id': ('creator', 'id', {str}),
196 'channel': ('channel', 'title', {str}),
197 'channel_id': ('channel', 'id', {str}),
198 'release_timestamp': ('releaseDate', {parse_iso8601}),
199 }),
200 }
201
202 items = []
203 for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
204 media_id = media['id']
205 media_typ = media.get('type') or 'video'
206
207 metadata = self._download_json(
208 f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
209 note=f'Downloading {media_typ} metadata')
210
211 stream = self._download_json(
212 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
213 'type': 'vod' if media_typ == 'video' else 'aod',
214 'guid': metadata['guid'],
215 }, note=f'Downloading {media_typ} stream data')
216
217 path_template = traverse_obj(stream, ('resource', 'uri', {str}))
218
219 def format_path(params):
220 path = path_template
221 for i, val in (params or {}).items():
222 path = path.replace(f'{{qualityLevelParams.{i}}}', val)
223 return path
224
225 formats = []
226 for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
227 url = urljoin(stream['cdn'], format_path(traverse_obj(
228 stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
229 formats.append({
230 **traverse_obj(quality, {
231 'format_id': ('name', {str}),
232 'format_note': ('label', {str}),
233 'width': ('width', {int}),
234 'height': ('height', {int}),
235 }),
236 **parse_codecs(quality.get('codecs')),
237 'url': url,
238 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
239 })
240
241 items.append({
242 **common_info,
243 'id': media_id,
244 **traverse_obj(metadata, {
245 'title': ('title', {str}),
246 'duration': ('duration', {int_or_none}),
247 'thumbnail': ('thumbnail', 'path', {url_or_none}),
248 }),
249 'formats': formats,
250 })
251
252 post_info = {
253 **common_info,
254 'id': post_id,
255 'display_id': post_id,
256 **traverse_obj(post_data, {
257 'title': ('title', {str}),
258 'description': ('text', {clean_html}),
259 'like_count': ('likes', {int_or_none}),
260 'dislike_count': ('dislikes', {int_or_none}),
261 'comment_count': ('comments', {int_or_none}),
262 'thumbnail': ('thumbnail', 'path', {url_or_none}),
263 }),
264 }
265
266 if len(items) > 1:
267 return self.playlist_result(items, **post_info)
268
269 post_info.update(items[0])
270 return post_info
271
272
273 class FloatplaneChannelIE(InfoExtractor):
274 _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
275 _PAGE_SIZE = 20
276 _TESTS = [{
277 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
278 'info_dict': {
279 'id': 'linustechtips/ltxexpo',
280 'title': 'LTX Expo',
281 'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149',
282 },
283 'playlist_mincount': 51,
284 }, {
285 'url': 'https://www.floatplane.com/channel/ShankMods/home',
286 'info_dict': {
287 'id': 'ShankMods',
288 'title': 'Shank Mods',
289 'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30',
290 },
291 'playlist_mincount': 14,
292 }, {
293 'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home',
294 'info_dict': {
295 'id': 'bitwit_ultra',
296 'title': 'Bitwit Ultra',
297 'description': 'md5:1452f280bb45962976d4789200f676dd',
298 },
299 'playlist_mincount': 200,
300 }]
301
302 def _fetch_page(self, display_id, creator_id, channel_id, page):
303 query = {
304 'id': creator_id,
305 'limit': self._PAGE_SIZE,
306 'fetchAfter': page * self._PAGE_SIZE,
307 }
308 if channel_id:
309 query['channel'] = channel_id
310 page_data = self._download_json(
311 'https://www.floatplane.com/api/v3/content/creator', display_id,
312 query=query, note=f'Downloading page {page + 1}')
313 for post in page_data or []:
314 yield self.url_result(
315 f'https://www.floatplane.com/post/{post["id"]}',
316 FloatplaneIE, id=post['id'], title=post.get('title'),
317 release_timestamp=parse_iso8601(post.get('releaseDate')))
318
319 def _real_extract(self, url):
320 creator, channel = self._match_valid_url(url).group('id', 'channel')
321 display_id = join_nonempty(creator, channel, delim='/')
322
323 creator_data = self._download_json(
324 'https://www.floatplane.com/api/v3/creator/named',
325 display_id, query={'creatorURL[0]': creator})[0]
326
327 channel_data = traverse_obj(
328 creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
329
330 return self.playlist_result(OnDemandPagedList(functools.partial(
331 self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
332 display_id, title=channel_data.get('title') or creator_data.get('title'),
333 description=channel_data.get('about') or creator_data.get('about'))