]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/allstar.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / allstar.py
1 import functools
2 import json
3
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 OnDemandPagedList,
8 int_or_none,
9 join_nonempty,
10 parse_qs,
11 urljoin,
12 )
13 from ..utils.traversal import traverse_obj
14
15
16 _FIELDS = '''
17 _id
18 clipImageSource
19 clipImageThumb
20 clipLink
21 clipTitle
22 createdDate
23 shareId
24 user { _id }
25 username
26 views'''
27
28 _EXTRA_FIELDS = '''
29 clipLength
30 clipSizeBytes'''
31
32 _QUERIES = {
33 'clip': '''query ($id: String!) {
34 video: getClip(clipIdentifier: $id) {
35 %s %s
36 }
37 }''' % (_FIELDS, _EXTRA_FIELDS),
38 'montage': '''query ($id: String!) {
39 video: getMontage(clipIdentifier: $id) {
40 %s
41 }
42 }''' % _FIELDS,
43 'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
44 videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
45 data { %s %s }
46 }
47 }''' % (_FIELDS, _EXTRA_FIELDS),
48 'Montages': '''query ($page: Int!, $user: String!) {
49 videos: montages(search: createdDate, page: $page, user: $user) {
50 data { %s }
51 }
52 }''' % _FIELDS,
53 'Mobile Clips': '''query ($page: Int!, $user: String!) {
54 videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
55 data { %s %s }
56 }
57 }''' % (_FIELDS, _EXTRA_FIELDS),
58 }
59
60
61 class AllstarBaseIE(InfoExtractor):
62 @staticmethod
63 def _parse_video_data(video_data):
64 def media_url_or_none(path):
65 return urljoin('https://media.allstar.gg/', path)
66
67 info = traverse_obj(video_data, {
68 'id': ('_id', {str}),
69 'display_id': ('shareId', {str}),
70 'title': ('clipTitle', {str}),
71 'url': ('clipLink', {media_url_or_none}),
72 'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
73 'duration': ('clipLength', {int_or_none}),
74 'filesize': ('clipSizeBytes', {int_or_none}),
75 'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
76 'uploader': ('username', {str}),
77 'uploader_id': ('user', '_id', {str}),
78 'view_count': ('views', {int_or_none}),
79 })
80
81 if info.get('id') and info.get('url'):
82 basename = 'clip' if '/clips/' in info['url'] else 'montage'
83 info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
84
85 info.update({
86 'extractor_key': AllstarIE.ie_key(),
87 'extractor': AllstarIE.IE_NAME,
88 'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
89 })
90
91 return info
92
93 def _call_api(self, query, variables, path, video_id=None, note=None):
94 response = self._download_json(
95 'https://a1.allstar.gg/graphql', video_id, note=note,
96 headers={'content-type': 'application/json'},
97 data=json.dumps({'variables': variables, 'query': query}).encode())
98
99 errors = traverse_obj(response, ('errors', ..., 'message', {str}))
100 if errors:
101 raise ExtractorError('; '.join(errors))
102
103 return traverse_obj(response, path)
104
105
106 class AllstarIE(AllstarBaseIE):
107 _VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
108
109 _TESTS = [{
110 'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
111 'info_dict': {
112 'id': '64482c2da9eec30008a67d1b',
113 'title': '4K on Inferno',
114 'url': 'md5:66befb5381eef0c9456026386c25fa55',
115 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
116 'uploader': 'chrk.',
117 'ext': 'mp4',
118 'duration': 20,
119 'filesize': 21199257,
120 'timestamp': 1682451501,
121 'uploader_id': '62b8bdfc9021052f7905882d',
122 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
123 'upload_date': '20230425',
124 'view_count': int,
125 }
126 }, {
127 'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
128 'info_dict': {
129 'id': '64a1ec6b887f4c0008dc50b8',
130 'display_id': '8LJLY4JKB',
131 'title': 'AK-47 3K on Mirage',
132 'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
133 'ext': 'mp4',
134 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
135 'duration': 16,
136 'filesize': 30175859,
137 'timestamp': 1688333419,
138 'uploader': 'cherokee',
139 'uploader_id': '62b8bdfc9021052f7905882d',
140 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
141 'upload_date': '20230702',
142 'view_count': int,
143 }
144 }, {
145 'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
146 'info_dict': {
147 'id': '643e64089da7e9363e1fa66c',
148 'display_id': 'APQLGM2IMXW',
149 'title': 'cherokee Rapid Fire Snipers Montage',
150 'url': 'md5:a3ee356022115db2b27c81321d195945',
151 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
152 'ext': 'mp4',
153 'timestamp': 1681810448,
154 'uploader': 'cherokee',
155 'uploader_id': '62b8bdfc9021052f7905882d',
156 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
157 'upload_date': '20230418',
158 'view_count': int,
159 }
160 }, {
161 'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
162 'info_dict': {
163 'id': '64a2697372ce3703de29e868',
164 'display_id': 'RILJMH6QOS',
165 'title': 'cherokee Rapid Fire Snipers Montage',
166 'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
167 'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
168 'ext': 'mp4',
169 'timestamp': 1688365434,
170 'uploader': 'cherokee',
171 'uploader_id': '62b8bdfc9021052f7905882d',
172 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
173 'upload_date': '20230703',
174 'view_count': int,
175 }
176 }]
177
178 def _real_extract(self, url):
179 query_id, video_id = self._match_valid_url(url).group('type', 'id')
180
181 return self._parse_video_data(
182 self._call_api(
183 _QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
184
185
186 class AllstarProfileIE(AllstarBaseIE):
187 _VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
188
189 _TESTS = [{
190 'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
191 'info_dict': {
192 'id': '62b8bdfc9021052f7905882d-clips',
193 'title': 'cherokee - Clips',
194 },
195 'playlist_mincount': 15
196 }, {
197 'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
198 'info_dict': {
199 'id': '62b8bdfc9021052f7905882d-clips-730',
200 'title': 'cherokee - Clips - 730',
201 },
202 'playlist_mincount': 15
203 }, {
204 'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
205 'info_dict': {
206 'id': '62b8bdfc9021052f7905882d-montages',
207 'title': 'cherokee - Montages',
208 },
209 'playlist_mincount': 4
210 }, {
211 'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
212 'info_dict': {
213 'id': '62b8bdfc9021052f7905882d-mobile',
214 'title': 'cherokee - Mobile Clips',
215 },
216 'playlist_mincount': 1
217 }]
218
219 _PAGE_SIZE = 10
220
221 def _get_page(self, user_id, display_id, game, query, page_num):
222 page_num += 1
223
224 for video_data in self._call_api(
225 query, {
226 'user': user_id,
227 'page': page_num,
228 'game': game,
229 }, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
230 yield self._parse_video_data(video_data)
231
232 def _real_extract(self, url):
233 display_id = self._match_id(url)
234 profile_data = self._download_json(
235 urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
236 user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
237 if not user_id:
238 raise ExtractorError('Unable to extract the user id')
239
240 username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
241 url_query = parse_qs(url)
242 game = traverse_obj(url_query, ('game', 0, {int_or_none}))
243 query_id = traverse_obj(url_query, ('view', 0), default='Clips')
244
245 if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
246 raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
247
248 return self.playlist_result(
249 OnDemandPagedList(
250 functools.partial(
251 self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
252 playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
253 playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))