]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/boosty.py
[extractor/goplay] Fix extractor (#6654)
[yt-dlp.git] / yt_dlp / extractor / boosty.py
1 from .common import InfoExtractor
2 from .youtube import YoutubeIE
3 from ..utils import (
4 ExtractorError,
5 int_or_none,
6 qualities,
7 str_or_none,
8 url_or_none,
9 )
10 from ..utils.traversal import traverse_obj
11
12
13 class BoostyIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:www\.)?boosty\.to/(?P<user>[^/#?]+)/posts/(?P<post_id>[^/#?]+)'
15 _TESTS = [{
16 # single ok_video
17 'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
18 'info_dict': {
19 'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
20 'title': 'phasma_3',
21 'channel': 'Kuplinov',
22 'channel_id': '7958701',
23 'timestamp': 1655031975,
24 'upload_date': '20220612',
25 'release_timestamp': 1655049000,
26 'release_date': '20220612',
27 'modified_timestamp': 1668680993,
28 'modified_date': '20221117',
29 'tags': ['куплинов', 'phasmophobia'],
30 'like_count': int,
31 'ext': 'mp4',
32 'duration': 105,
33 'view_count': int,
34 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
35 },
36 }, {
37 # multiple ok_video
38 'url': 'https://boosty.to/maddyson/posts/0c652798-3b35-471f-8b48-a76a0b28736f',
39 'info_dict': {
40 'id': '0c652798-3b35-471f-8b48-a76a0b28736f',
41 'title': 'то что не пропустил юта6',
42 'channel': 'Илья Давыдов',
43 'channel_id': '6808257',
44 'timestamp': 1694017040,
45 'upload_date': '20230906',
46 'release_timestamp': 1694017040,
47 'release_date': '20230906',
48 'modified_timestamp': 1694071178,
49 'modified_date': '20230907',
50 'like_count': int,
51 },
52 'playlist_count': 3,
53 'playlist': [{
54 'info_dict': {
55 'id': 'cc325a9f-a563-41c6-bf47-516c1b506c9a',
56 'title': 'то что не пропустил юта6',
57 'channel': 'Илья Давыдов',
58 'channel_id': '6808257',
59 'timestamp': 1694017040,
60 'upload_date': '20230906',
61 'release_timestamp': 1694017040,
62 'release_date': '20230906',
63 'modified_timestamp': 1694071178,
64 'modified_date': '20230907',
65 'like_count': int,
66 'ext': 'mp4',
67 'duration': 31204,
68 'view_count': int,
69 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
70 },
71 }, {
72 'info_dict': {
73 'id': 'd07b0a72-9493-4512-b54e-55ce468fd4b7',
74 'title': 'то что не пропустил юта6',
75 'channel': 'Илья Давыдов',
76 'channel_id': '6808257',
77 'timestamp': 1694017040,
78 'upload_date': '20230906',
79 'release_timestamp': 1694017040,
80 'release_date': '20230906',
81 'modified_timestamp': 1694071178,
82 'modified_date': '20230907',
83 'like_count': int,
84 'ext': 'mp4',
85 'duration': 25704,
86 'view_count': int,
87 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
88 },
89 }, {
90 'info_dict': {
91 'id': '4a3bba32-78c8-422a-9432-2791aff60b42',
92 'title': 'то что не пропустил юта6',
93 'channel': 'Илья Давыдов',
94 'channel_id': '6808257',
95 'timestamp': 1694017040,
96 'upload_date': '20230906',
97 'release_timestamp': 1694017040,
98 'release_date': '20230906',
99 'modified_timestamp': 1694071178,
100 'modified_date': '20230907',
101 'like_count': int,
102 'ext': 'mp4',
103 'duration': 31867,
104 'view_count': int,
105 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
106 },
107 }],
108 }, {
109 # single external video (youtube)
110 'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
111 'info_dict': {
112 'id': 'EXelTnve5lY',
113 'title': 'Послание Президента Федеральному Собранию | Класс народа',
114 'upload_date': '20210425',
115 'channel': 'Денис Чужой',
116 'tags': 'count:10',
117 'like_count': int,
118 'ext': 'mp4',
119 'duration': 816,
120 'view_count': int,
121 'thumbnail': r're:^https://i\.ytimg\.com/',
122 'age_limit': 0,
123 'availability': 'public',
124 'categories': list,
125 'channel_follower_count': int,
126 'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
127 'channel_is_verified': bool,
128 'channel_url': r're:^https://www\.youtube\.com/',
129 'comment_count': int,
130 'description': str,
131 'heatmap': 'count:100',
132 'live_status': str,
133 'playable_in_embed': bool,
134 'uploader': str,
135 'uploader_id': str,
136 'uploader_url': r're:^https://www\.youtube\.com/',
137 },
138 }]
139
140 _MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
141
142 def _extract_formats(self, player_urls, video_id):
143 formats = []
144 quality = qualities(self._MP4_TYPES)
145 for player_url in traverse_obj(player_urls, lambda _, v: url_or_none(v['url'])):
146 url = player_url['url']
147 format_type = player_url.get('type')
148 if format_type in ('hls', 'hls_live', 'live_ondemand_hls', 'live_playback_hls'):
149 formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id='hls', fatal=False))
150 elif format_type in ('dash', 'dash_live', 'live_playback_dash'):
151 formats.extend(self._extract_mpd_formats(url, video_id, mpd_id='dash', fatal=False))
152 elif format_type in self._MP4_TYPES:
153 formats.append({
154 'url': url,
155 'ext': 'mp4',
156 'format_id': format_type,
157 'quality': quality(format_type),
158 })
159 else:
160 self.report_warning(f'Unknown format type: {format_type!r}')
161 return formats
162
163 def _real_extract(self, url):
164 user, post_id = self._match_valid_url(url).group('user', 'post_id')
165 post = self._download_json(
166 f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
167 note='Downloading post data', errnote='Unable to download post data')
168
169 post_title = post.get('title')
170 if not post_title:
171 self.report_warning('Unable to extract post title. Falling back to parsing html page')
172 webpage = self._download_webpage(url, video_id=post_id)
173 post_title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
174
175 common_metadata = {
176 'title': post_title,
177 **traverse_obj(post, {
178 'channel': ('user', 'name', {str}),
179 'channel_id': ('user', 'id', {str_or_none}),
180 'timestamp': ('createdAt', {int_or_none}),
181 'release_timestamp': ('publishTime', {int_or_none}),
182 'modified_timestamp': ('updatedAt', {int_or_none}),
183 'tags': ('tags', ..., 'title', {str}),
184 'like_count': ('count', 'likes', {int_or_none}),
185 }),
186 }
187 entries = []
188 for item in traverse_obj(post, ('data', ..., {dict})):
189 item_type = item.get('type')
190 if item_type == 'video' and url_or_none(item.get('url')):
191 entries.append(self.url_result(item['url'], YoutubeIE))
192 elif item_type == 'ok_video':
193 video_id = item.get('id') or post_id
194 entries.append({
195 'id': video_id,
196 'formats': self._extract_formats(item.get('playerUrls'), video_id),
197 **common_metadata,
198 **traverse_obj(item, {
199 'title': ('title', {str}),
200 'duration': ('duration', {int_or_none}),
201 'view_count': ('viewsCounter', {int_or_none}),
202 'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
203 }, get_all=False)})
204
205 if not entries:
206 raise ExtractorError('No videos found', expected=True)
207 if len(entries) == 1:
208 return entries[0]
209 return self.playlist_result(entries, post_id, post_title, **common_metadata)