yt_dlp/extractor/floatplane.py

   1 import functools
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     OnDemandPagedList,
   7     clean_html,
   8     determine_ext,
   9     format_field,
  10     int_or_none,
  11     join_nonempty,
  12     parse_codecs,
  13     parse_iso8601,
  14     urljoin,
  15 )
  16 from ..utils.traversal import traverse_obj
  17
  18
  19 class FloatplaneIE(InfoExtractor):
  20     _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
  21     _TESTS = [{
  22         'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
  23         'info_dict': {
  24             'id': 'yuleLogLTT',
  25             'ext': 'mp4',
  26             'display_id': '2Yf3UedF7C',
  27             'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours',
  28             'description': 'md5:adf2970e0de1c5e3df447818bb0309f6',
  29             'thumbnail': r're:^https?://.*\.jpe?g$',
  30             'duration': 36035,
  31             'comment_count': int,
  32             'like_count': int,
  33             'dislike_count': int,
  34             'release_date': '20191206',
  35             'release_timestamp': 1575657000,
  36             'uploader': 'LinusTechTips',
  37             'uploader_id': '59f94c0bdd241b70349eb72b',
  38             'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
  39             'channel': 'Linus Tech Tips',
  40             'channel_id': '63fe42c309e691e4e36de93d',
  41             'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main',
  42             'availability': 'subscriber_only',
  43         },
  44         'params': {'skip_download': 'm3u8'},
  45     }, {
  46         'url': 'https://www.floatplane.com/post/j2jqG3JmgJ',
  47         'info_dict': {
  48             'id': 'j2jqG3JmgJ',
  49             'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?',
  50             'description': 'md5:00bf17dc5733e4031e99b7fd6489f274',
  51             'thumbnail': r're:^https?://.*\.jpe?g$',
  52             'comment_count': int,
  53             'like_count': int,
  54             'dislike_count': int,
  55             'release_timestamp': 1671915900,
  56             'release_date': '20221224',
  57             'uploader': 'LinusTechTips',
  58             'uploader_id': '59f94c0bdd241b70349eb72b',
  59             'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
  60             'channel': "They're Just Movies",
  61             'channel_id': '64135f82fc76ab7f9fbdc876',
  62             'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm',
  63             'availability': 'subscriber_only',
  64         },
  65         'playlist_count': 2,
  66     }, {
  67         'url': 'https://www.floatplane.com/post/3tK2tInhoN',
  68         'info_dict': {
  69             'id': '3tK2tInhoN',
  70             'title': 'Extras - How Linus Communicates with Editors (Compensator 4)',
  71             'description': 'md5:83cd40aae1ce124df33769600c80ca5b',
  72             'thumbnail': r're:^https?://.*\.jpe?g$',
  73             'comment_count': int,
  74             'like_count': int,
  75             'dislike_count': int,
  76             'release_timestamp': 1700529120,
  77             'release_date': '20231121',
  78             'uploader': 'LinusTechTips',
  79             'uploader_id': '59f94c0bdd241b70349eb72b',
  80             'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
  81             'channel': 'FP Exclusives',
  82             'channel_id': '6413623f5b12cca228a28e78',
  83             'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive',
  84             'availability': 'subscriber_only',
  85         },
  86         'playlist_count': 2,
  87     }, {
  88         'url': 'https://beta.floatplane.com/post/d870PEFXS1',
  89         'info_dict': {
  90             'id': 'bg9SuYKEww',
  91             'ext': 'mp4',
  92             'display_id': 'd870PEFXS1',
  93             'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!',
  94             'description': 'md5:80d612dcabf41b17487afcbe303ec57d',
  95             'thumbnail': r're:^https?://.*\.jpe?g$',
  96             'release_timestamp': 1700622000,
  97             'release_date': '20231122',
  98             'duration': 513,
  99             'like_count': int,
 100             'dislike_count': int,
 101             'comment_count': int,
 102             'uploader': 'LinusTechTips',
 103             'uploader_id': '59f94c0bdd241b70349eb72b',
 104             'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
 105             'channel': 'GameLinked',
 106             'channel_id': '649dbade3540dbc3945eeda7',
 107             'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked',
 108             'availability': 'subscriber_only',
 109         },
 110         'params': {'skip_download': 'm3u8'},
 111     }]
 112
 113     def _real_initialize(self):
 114         if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
 115             self.raise_login_required()
 116
 117     def _real_extract(self, url):
 118         post_id = self._match_id(url)
 119
 120         post_data = self._download_json(
 121             'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
 122             note='Downloading post data', errnote='Unable to download post data')
 123
 124         if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
 125             raise ExtractorError('Post does not contain a video or audio track', expected=True)
 126
 127         items = []
 128         for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
 129             media_id = media['id']
 130             media_typ = media.get('type') or 'video'
 131
 132             metadata = self._download_json(
 133                 f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
 134                 note=f'Downloading {media_typ} metadata')
 135
 136             stream = self._download_json(
 137                 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
 138                     'type': 'vod' if media_typ == 'video' else 'aod',
 139                     'guid': metadata['guid']
 140                 }, note=f'Downloading {media_typ} stream data')
 141
 142             path_template = traverse_obj(stream, ('resource', 'uri', {str}))
 143
 144             def format_path(params):
 145                 path = path_template
 146                 for i, val in (params or {}).items():
 147                     path = path.replace(f'{{qualityLevelParams.{i}}}', val)
 148                 return path
 149
 150             formats = []
 151             for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
 152                 url = urljoin(stream['cdn'], format_path(traverse_obj(
 153                     stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
 154                 formats.append({
 155                     **traverse_obj(quality, {
 156                         'format_id': 'name',
 157                         'format_note': 'label',
 158                         'width': ('width', {int}),
 159                         'height': ('height', {int}),
 160                     }),
 161                     **parse_codecs(quality.get('codecs')),
 162                     'url': url,
 163                     'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
 164                 })
 165
 166             items.append({
 167                 'id': media_id,
 168                 **traverse_obj(metadata, {
 169                     'title': 'title',
 170                     'duration': ('duration', {int_or_none}),
 171                     'thumbnail': ('thumbnail', 'path'),
 172                 }),
 173                 'formats': formats,
 174             })
 175
 176         uploader_url = format_field(
 177             post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
 178         channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
 179
 180         post_info = {
 181             'id': post_id,
 182             'display_id': post_id,
 183             **traverse_obj(post_data, {
 184                 'title': 'title',
 185                 'description': ('text', {clean_html}),
 186                 'uploader': ('creator', 'title'),
 187                 'uploader_id': ('creator', 'id'),
 188                 'channel': ('channel', 'title'),
 189                 'channel_id': ('channel', 'id'),
 190                 'like_count': ('likes', {int_or_none}),
 191                 'dislike_count': ('dislikes', {int_or_none}),
 192                 'comment_count': ('comments', {int_or_none}),
 193                 'release_timestamp': ('releaseDate', {parse_iso8601}),
 194                 'thumbnail': ('thumbnail', 'path'),
 195             }),
 196             'uploader_url': uploader_url,
 197             'channel_url': channel_url,
 198             'availability': self._availability(needs_subscription=True),
 199         }
 200
 201         if len(items) > 1:
 202             return self.playlist_result(items, **post_info)
 203
 204         post_info.update(items[0])
 205         return post_info
 206
 207
 208 class FloatplaneChannelIE(InfoExtractor):
 209     _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
 210     _PAGE_SIZE = 20
 211     _TESTS = [{
 212         'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
 213         'info_dict': {
 214             'id': 'linustechtips/ltxexpo',
 215             'title': 'LTX Expo',
 216             'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149',
 217         },
 218         'playlist_mincount': 51,
 219     }, {
 220         'url': 'https://www.floatplane.com/channel/ShankMods/home',
 221         'info_dict': {
 222             'id': 'ShankMods',
 223             'title': 'Shank Mods',
 224             'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30',
 225         },
 226         'playlist_mincount': 14,
 227     }, {
 228         'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home',
 229         'info_dict': {
 230             'id': 'bitwit_ultra',
 231             'title': 'Bitwit Ultra',
 232             'description': 'md5:1452f280bb45962976d4789200f676dd',
 233         },
 234         'playlist_mincount': 200,
 235     }]
 236
 237     def _fetch_page(self, display_id, creator_id, channel_id, page):
 238         query = {
 239             'id': creator_id,
 240             'limit': self._PAGE_SIZE,
 241             'fetchAfter': page * self._PAGE_SIZE,
 242         }
 243         if channel_id:
 244             query['channel'] = channel_id
 245         page_data = self._download_json(
 246             'https://www.floatplane.com/api/v3/content/creator', display_id,
 247             query=query, note=f'Downloading page {page + 1}')
 248         for post in page_data or []:
 249             yield self.url_result(
 250                 f'https://www.floatplane.com/post/{post["id"]}',
 251                 FloatplaneIE, id=post['id'], title=post.get('title'),
 252                 release_timestamp=parse_iso8601(post.get('releaseDate')))
 253
 254     def _real_extract(self, url):
 255         creator, channel = self._match_valid_url(url).group('id', 'channel')
 256         display_id = join_nonempty(creator, channel, delim='/')
 257
 258         creator_data = self._download_json(
 259             'https://www.floatplane.com/api/v3/creator/named',
 260             display_id, query={'creatorURL[0]': creator})[0]
 261
 262         channel_data = traverse_obj(
 263             creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
 264
 265         return self.playlist_result(OnDemandPagedList(functools.partial(
 266             self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
 267             display_id, title=channel_data.get('title') or creator_data.get('title'),
 268             description=channel_data.get('about') or creator_data.get('about'))