]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/vvvvid.py
[ie/vvvvid] Set user-agent to fix extraction (#8615)
[yt-dlp.git] / yt_dlp / extractor / vvvvid.py
1 import functools
2 import re
3
4 from .common import InfoExtractor
5 from .youtube import YoutubeIE
6 from ..utils import (
7 ExtractorError,
8 int_or_none,
9 str_or_none,
10 )
11
12
13 class VVVVIDIE(InfoExtractor):
14 _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
15 _VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
16 _TESTS = [{
17 # video_type == 'video/vvvvid'
18 'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare',
19 'info_dict': {
20 'id': '505692',
21 'ext': 'mp4',
22 'title': 'Playstation VR cambierà il nostro modo di giocare',
23 'duration': 93,
24 'series': 'The Power of Computing',
25 'season_id': '518',
26 'episode': 'Playstation VR cambierà il nostro modo di giocare',
27 'episode_number': None,
28 'episode_id': '4747',
29 'view_count': int,
30 'like_count': int,
31 'repost_count': int,
32 'thumbnail': 'https://static.vvvvid.it/img/zoomin/28CA2409-E663-34F0-2B02E72356556EA3_500k.jpg',
33 },
34 'params': {
35 'skip_download': True,
36 },
37 }, {
38 # video_type == 'video/rcs'
39 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
40 'info_dict': {
41 'id': '482493',
42 'ext': 'mp4',
43 'title': 'Episodio 01',
44 },
45 'params': {
46 'skip_download': True,
47 },
48 'skip': 'Every video/rcs is not working even in real website',
49 }, {
50 # video_type == 'video/youtube'
51 'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
52 'md5': '33e0edfba720ad73a8782157fdebc648',
53 'info_dict': {
54 'id': 'RzmFKUDOUgw',
55 'ext': 'mp4',
56 'title': 'Trailer',
57 'upload_date': '20150906',
58 'description': 'md5:a5e802558d35247fee285875328c0b80',
59 'uploader_id': '@EMOTIONLabelChannel',
60 'uploader': 'EMOTION Label Channel',
61 'episode_number': None,
62 'episode_id': '3115',
63 'view_count': int,
64 'like_count': int,
65 'repost_count': int,
66 'availability': str,
67 'categories': list,
68 'age_limit': 0,
69 'channel': 'EMOTION Label Channel',
70 'channel_follower_count': int,
71 'channel_id': 'UCQ5URCSs1f5Cz9rh-cDGxNQ',
72 'channel_url': 'https://www.youtube.com/channel/UCQ5URCSs1f5Cz9rh-cDGxNQ',
73 'comment_count': int,
74 'duration': 133,
75 'episode': 'Trailer',
76 'heatmap': list,
77 'live_status': 'not_live',
78 'playable_in_embed': True,
79 'season_id': '406',
80 'series': 'One-Punch Man',
81 'tags': list,
82 'uploader_url': 'https://www.youtube.com/@EMOTIONLabelChannel',
83 'thumbnail': 'https://i.ytimg.com/vi/RzmFKUDOUgw/maxresdefault.jpg',
84 },
85 'params': {
86 'skip_download': True,
87 },
88 }, {
89 # video_type == 'video/dash'
90 'url': 'https://www.vvvvid.it/show/844/le-bizzarre-avventure-di-jojo-vento-aureo/938/527551/golden-wind',
91 'info_dict': {
92 'id': '527551',
93 'ext': 'mp4',
94 'title': 'Golden Wind',
95 'duration': 1430,
96 'series': 'Le bizzarre avventure di Jojo - Vento Aureo',
97 'season_id': '938',
98 'episode': 'Golden Wind',
99 'episode_number': 1,
100 'episode_id': '9089',
101 'view_count': int,
102 'like_count': int,
103 'repost_count': int,
104 'thumbnail': 'https://static.vvvvid.it/img/thumbs/Dynit/Jojo/Jojo_S05Ep01-t.jpg',
105 'season': 'Season 5',
106 'season_number': 5,
107 },
108 'params': {
109 'skip_download': True,
110 'format': 'mp4',
111 },
112 }, {
113 'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
114 'only_matching': True
115 }]
116 _conn_id = None
117
118 @functools.cached_property
119 def _headers(self):
120 return {
121 **self.geo_verification_headers(),
122 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.50 Safari/537.37',
123 }
124
125 def _real_initialize(self):
126 self._conn_id = self._download_json(
127 'https://www.vvvvid.it/user/login',
128 None, headers=self._headers)['data']['conn_id']
129
130 def _download_info(self, show_id, path, video_id, fatal=True, query=None):
131 q = {
132 'conn_id': self._conn_id,
133 }
134 if query:
135 q.update(query)
136 response = self._download_json(
137 'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
138 video_id, headers=self._headers, query=q, fatal=fatal)
139 if not (response or fatal):
140 return
141 if response.get('result') == 'error':
142 raise ExtractorError('%s said: %s' % (
143 self.IE_NAME, response['message']), expected=True)
144 return response['data']
145
146 def _extract_common_video_info(self, video_data):
147 return {
148 'thumbnail': video_data.get('thumbnail'),
149 'episode_id': str_or_none(video_data.get('id')),
150 }
151
152 def _real_extract(self, url):
153 show_id, season_id, video_id = self._match_valid_url(url).groups()
154
155 response = self._download_info(
156 show_id, 'season/%s' % season_id,
157 video_id, query={'video_id': video_id})
158
159 vid = int(video_id)
160 video_data = list(filter(
161 lambda episode: episode.get('video_id') == vid, response))[0]
162 title = video_data['title']
163 formats = []
164
165 # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
166 def ds(h):
167 g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
168
169 def f(m):
170 l = []
171 o = 0
172 b = False
173 m_len = len(m)
174 while ((not b) and o < m_len):
175 n = m[o] << 2
176 o += 1
177 k = -1
178 j = -1
179 if o < m_len:
180 n += m[o] >> 4
181 o += 1
182 if o < m_len:
183 k = (m[o - 1] << 4) & 255
184 k += m[o] >> 2
185 o += 1
186 if o < m_len:
187 j = (m[o - 1] << 6) & 255
188 j += m[o]
189 o += 1
190 else:
191 b = True
192 else:
193 b = True
194 else:
195 b = True
196 l.append(n)
197 if k != -1:
198 l.append(k)
199 if j != -1:
200 l.append(j)
201 return l
202
203 c = []
204 for e in h:
205 c.append(g.index(e))
206
207 c_len = len(c)
208 for e in range(c_len * 2 - 1, -1, -1):
209 a = c[e % c_len] ^ c[(e + 1) % c_len]
210 c[e % c_len] = a
211
212 c = f(c)
213 d = ''
214 for e in c:
215 d += chr(e)
216
217 return d
218
219 info = {}
220
221 def metadata_from_url(r_url):
222 if not info and r_url:
223 mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
224 if mobj:
225 info['episode_number'] = int(mobj.group(2))
226 season_number = mobj.group(1)
227 if season_number:
228 info['season_number'] = int(season_number)
229
230 video_type = video_data.get('video_type')
231 is_youtube = False
232 for quality in ('', '_sd'):
233 embed_code = video_data.get('embed_info' + quality)
234 if not embed_code:
235 continue
236 embed_code = ds(embed_code)
237 if video_type == 'video/kenc':
238 embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
239 kenc = self._download_json(
240 'https://www.vvvvid.it/kenc', video_id, query={
241 'action': 'kt',
242 'conn_id': self._conn_id,
243 'url': embed_code,
244 }, fatal=False) or {}
245 kenc_message = kenc.get('message')
246 if kenc_message:
247 embed_code += '?' + ds(kenc_message)
248 formats.extend(self._extract_m3u8_formats(
249 embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
250 elif video_type == 'video/rcs':
251 formats.extend(self._extract_akamai_formats(embed_code, video_id))
252 elif video_type == 'video/youtube':
253 info.update({
254 '_type': 'url_transparent',
255 'ie_key': YoutubeIE.ie_key(),
256 'url': embed_code,
257 })
258 is_youtube = True
259 break
260 elif video_type == 'video/dash':
261 formats.extend(self._extract_m3u8_formats(
262 embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
263 else:
264 formats.extend(self._extract_wowza_formats(
265 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id, skip_protocols=['f4m']))
266 metadata_from_url(embed_code)
267
268 if not is_youtube:
269 info['formats'] = formats
270
271 metadata_from_url(video_data.get('thumbnail'))
272 info.update(self._extract_common_video_info(video_data))
273 info.update({
274 'id': video_id,
275 'title': title,
276 'duration': int_or_none(video_data.get('length')),
277 'series': video_data.get('show_title'),
278 'season_id': season_id,
279 'episode': title,
280 'view_count': int_or_none(video_data.get('views')),
281 'like_count': int_or_none(video_data.get('video_likes')),
282 'repost_count': int_or_none(video_data.get('video_shares')),
283 })
284 return info
285
286
287 class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE
288 _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
289 _TESTS = [{
290 'url': 'https://www.vvvvid.it/show/156/psyco-pass',
291 'info_dict': {
292 'id': '156',
293 'title': 'Psycho-Pass',
294 'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
295 },
296 'playlist_count': 46,
297 }, {
298 'url': 'https://www.vvvvid.it/show/156',
299 'only_matching': True,
300 }]
301
302 def _real_extract(self, url):
303 base_url, show_id, show_title = self._match_valid_url(url).groups()
304
305 seasons = self._download_info(
306 show_id, 'seasons/', show_title)
307
308 show_info = self._download_info(
309 show_id, 'info/', show_title, fatal=False)
310
311 if not show_title:
312 base_url += "/title"
313
314 entries = []
315 for season in (seasons or []):
316 episodes = season.get('episodes') or []
317 playlist_title = season.get('name') or show_info.get('title')
318 for episode in episodes:
319 if episode.get('playable') is False:
320 continue
321 season_id = str_or_none(episode.get('season_id'))
322 video_id = str_or_none(episode.get('video_id'))
323 if not (season_id and video_id):
324 continue
325 info = self._extract_common_video_info(episode)
326 info.update({
327 '_type': 'url_transparent',
328 'ie_key': VVVVIDIE.ie_key(),
329 'url': '/'.join([base_url, season_id, video_id]),
330 'title': episode.get('title'),
331 'description': episode.get('description'),
332 'season_id': season_id,
333 'playlist_title': playlist_title,
334 })
335 entries.append(info)
336
337 return self.playlist_result(
338 entries, show_id, show_info.get('title'), show_info.get('description'))