]>
Commit | Line | Data |
---|---|---|
6f4e4132 YCH |
1 | import time |
2 | import hashlib | |
f14c2333 | 3 | import urllib |
21f40e75 | 4 | import uuid |
6f4e4132 | 5 | |
a172d962 | 6 | from .common import InfoExtractor |
21f40e75 | 7 | from .openload import PhantomJSwrapper |
3b4b82d4 YCH |
8 | from ..utils import ( |
9 | ExtractorError, | |
21f40e75 | 10 | UserNotLive, |
11 | determine_ext, | |
12 | int_or_none, | |
13 | js_to_json, | |
14 | parse_resolution, | |
15 | str_or_none, | |
16 | traverse_obj, | |
3b4b82d4 | 17 | unescapeHTML, |
21f40e75 | 18 | url_or_none, |
19 | urlencode_postdata, | |
7274f3d0 | 20 | urljoin, |
3b4b82d4 | 21 | ) |
a172d962 | 22 | |
a172d962 | 23 | |
21f40e75 | 24 | class DouyuBaseIE(InfoExtractor): |
25 | def _download_cryptojs_md5(self, video_id): | |
26 | for url in [ | |
27 | 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js', | |
28 | 'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js', | |
29 | ]: | |
30 | js_code = self._download_webpage( | |
31 | url, video_id, note='Downloading signing dependency', fatal=False) | |
32 | if js_code: | |
33 | self.cache.store('douyu', 'crypto-js-md5', js_code) | |
34 | return js_code | |
35 | raise ExtractorError('Unable to download JS dependency (crypto-js/md5)') | |
36 | ||
37 | def _get_cryptojs_md5(self, video_id): | |
38 | return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id) | |
39 | ||
40 | def _calc_sign(self, sign_func, video_id, a): | |
41 | b = uuid.uuid4().hex | |
42 | c = round(time.time()) | |
43 | js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))' | |
44 | phantom = PhantomJSwrapper(self) | |
45 | result = phantom.execute(js_script, video_id, | |
46 | note='Executing JS signing script').strip() | |
47 | return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()} | |
48 | ||
49 | def _search_js_sign_func(self, webpage, fatal=True): | |
50 | # The greedy look-behind ensures last possible script tag is matched | |
51 | return self._search_regex( | |
52 | r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal) | |
53 | ||
54 | ||
55 | class DouyuTVIE(DouyuBaseIE): | |
56 | IE_DESC = '斗鱼直播' | |
f14c2333 | 57 | _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)' |
8343a033 | 58 | _TESTS = [{ |
21f40e75 | 59 | 'url': 'https://www.douyu.com/pigff', |
a172d962 | 60 | 'info_dict': { |
21f40e75 | 61 | 'id': '24422', |
62 | 'display_id': 'pigff', | |
63 | 'ext': 'mp4', | |
64 | 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
65 | 'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群', | |
66 | 'thumbnail': str, | |
67 | 'uploader': 'pigff', | |
a172d962 | 68 | 'is_live': True, |
21f40e75 | 69 | 'live_status': 'is_live', |
2ca1c5aa S |
70 | }, |
71 | 'params': { | |
72 | 'skip_download': True, | |
24ca0e9c | 73 | }, |
8343a033 YCH |
74 | }, { |
75 | 'url': 'http://www.douyutv.com/85982', | |
76 | 'info_dict': { | |
77 | 'id': '85982', | |
78 | 'display_id': '85982', | |
6f4e4132 | 79 | 'ext': 'flv', |
8343a033 YCH |
80 | 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
81 | 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', | |
f14c2333 | 82 | 'thumbnail': r're:^https?://.*\.png', |
8343a033 | 83 | 'uploader': 'douyu小漠', |
8343a033 YCH |
84 | 'is_live': True, |
85 | }, | |
86 | 'params': { | |
87 | 'skip_download': True, | |
24ca0e9c | 88 | }, |
aa9dc24f | 89 | 'skip': 'Room not found', |
24ca0e9c YCH |
90 | }, { |
91 | 'url': 'http://www.douyutv.com/17732', | |
92 | 'info_dict': { | |
93 | 'id': '17732', | |
94 | 'display_id': '17732', | |
6f4e4132 | 95 | 'ext': 'flv', |
c6fe5a7e | 96 | 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
ec85ded8 | 97 | 'description': r're:.*m7show@163\.com.*', |
f14c2333 | 98 | 'thumbnail': r're:^https?://.*\.png', |
24ca0e9c | 99 | 'uploader': '7师傅', |
24ca0e9c YCH |
100 | 'is_live': True, |
101 | }, | |
102 | 'params': { | |
103 | 'skip_download': True, | |
104 | }, | |
f14c2333 HTL |
105 | }, { |
106 | 'url': 'https://www.douyu.com/topic/ydxc?rid=6560603', | |
107 | 'info_dict': { | |
108 | 'id': '6560603', | |
109 | 'display_id': '6560603', | |
110 | 'ext': 'flv', | |
111 | 'title': 're:^阿余:新年快乐恭喜发财! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
112 | 'description': 're:.*直播时间.*', | |
113 | 'thumbnail': r're:^https?://.*\.png', | |
114 | 'uploader': '阿涛皎月Carry', | |
115 | 'live_status': 'is_live', | |
116 | }, | |
117 | 'params': { | |
118 | 'skip_download': True, | |
119 | }, | |
3bb33568 YCH |
120 | }, { |
121 | 'url': 'http://www.douyu.com/xiaocang', | |
122 | 'only_matching': True, | |
33da98f4 J |
123 | }, { |
124 | # \"room_id\" | |
125 | 'url': 'http://www.douyu.com/t/lpl', | |
126 | 'only_matching': True, | |
8343a033 | 127 | }] |
a172d962 | 128 | |
21f40e75 | 129 | def _get_sign_func(self, room_id, video_id): |
130 | return self._download_json( | |
131 | f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id, | |
132 | note='Getting signing script')['data'][f'room{room_id}'] | |
133 | ||
134 | def _extract_stream_formats(self, stream_formats): | |
135 | formats = [] | |
136 | for stream_info in traverse_obj(stream_formats, (..., 'data')): | |
137 | stream_url = urljoin( | |
138 | traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live')) | |
139 | if stream_url: | |
140 | rate_id = traverse_obj(stream_info, ('rate', {int_or_none})) | |
141 | rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False) | |
142 | ext = determine_ext(stream_url) | |
143 | formats.append({ | |
144 | 'url': stream_url, | |
145 | 'format_id': str_or_none(rate_id), | |
146 | 'ext': 'mp4' if ext == 'm3u8' else ext, | |
147 | 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', | |
148 | 'quality': rate_id % -10000 if rate_id is not None else None, | |
149 | **traverse_obj(rate_info, { | |
150 | 'format': ('name', {str_or_none}), | |
151 | 'tbr': ('bit', {int_or_none}), | |
152 | }), | |
153 | }) | |
154 | return formats | |
155 | ||
a172d962 | 156 | def _real_extract(self, url): |
157 | video_id = self._match_id(url) | |
a172d962 | 158 | |
21f40e75 | 159 | webpage = self._download_webpage(url, video_id) |
160 | room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id') | |
161 | ||
162 | if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1': | |
163 | raise UserNotLive('The channel is auto-playing VODs', video_id=video_id) | |
164 | if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2': | |
165 | raise UserNotLive(video_id=video_id) | |
8343a033 | 166 | |
f14c2333 HTL |
167 | # Grab metadata from API |
168 | params = { | |
169 | 'aid': 'wp', | |
170 | 'client_sys': 'wp', | |
171 | 'time': int(time.time()), | |
172 | } | |
173 | params['auth'] = hashlib.md5( | |
21f40e75 | 174 | f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest() |
175 | room = traverse_obj(self._download_json( | |
f14c2333 | 176 | f'http://www.douyutv.com/api/v1/room/{room_id}', video_id, |
21f40e75 | 177 | note='Downloading room info', query=params, fatal=False), 'data') |
b281aad2 | 178 | |
b281aad2 | 179 | # 1 = live, 2 = offline |
21f40e75 | 180 | if traverse_obj(room, 'show_status') == '2': |
181 | raise UserNotLive(video_id=video_id) | |
3b4b82d4 | 182 | |
21f40e75 | 183 | js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id) |
184 | form_data = { | |
185 | 'rate': 0, | |
186 | **self._calc_sign(js_sign_func, video_id, room_id), | |
187 | } | |
188 | stream_formats = [self._download_json( | |
189 | f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', | |
190 | video_id, note="Downloading livestream format", | |
191 | data=urlencode_postdata(form_data))] | |
2ca1c5aa | 192 | |
21f40e75 | 193 | for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')): |
194 | if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')): | |
195 | form_data['rate'] = rate_id | |
196 | stream_formats.append(self._download_json( | |
197 | f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', | |
198 | video_id, note=f'Downloading livestream format {rate_id}', | |
199 | data=urlencode_postdata(form_data))) | |
a172d962 | 200 | |
201 | return { | |
8343a033 | 202 | 'id': room_id, |
21f40e75 | 203 | 'formats': self._extract_stream_formats(stream_formats), |
a172d962 | 204 | 'is_live': True, |
21f40e75 | 205 | **traverse_obj(room, { |
206 | 'display_id': ('url', {str}, {lambda i: i[1:]}), | |
207 | 'title': ('room_name', {unescapeHTML}), | |
208 | 'description': ('show_details', {str}), | |
209 | 'uploader': ('nickname', {str}), | |
210 | 'thumbnail': ('room_src', {url_or_none}), | |
211 | }) | |
2ca1c5aa | 212 | } |
7274f3d0 YCH |
213 | |
214 | ||
21f40e75 | 215 | class DouyuShowIE(DouyuBaseIE): |
7274f3d0 YCH |
216 | _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)' |
217 | ||
218 | _TESTS = [{ | |
21f40e75 | 219 | 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY', |
7274f3d0 | 220 | 'info_dict': { |
21f40e75 | 221 | 'id': 'mPyq7oVNe5Yv1gLY', |
7274f3d0 | 222 | 'ext': 'mp4', |
21f40e75 | 223 | 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃', |
224 | 'duration': 633, | |
225 | 'thumbnail': str, | |
226 | 'uploader': '美食作家王刚V', | |
227 | 'uploader_id': 'OVAO4NVx1m7Q', | |
228 | 'timestamp': 1661850002, | |
229 | 'upload_date': '20220830', | |
230 | 'view_count': int, | |
231 | 'tags': ['美食', '美食综合'], | |
7274f3d0 YCH |
232 | }, |
233 | }, { | |
234 | 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw', | |
235 | 'only_matching': True, | |
236 | }] | |
237 | ||
21f40e75 | 238 | _FORMATS = { |
239 | 'super': '原画', | |
240 | 'high': '超清', | |
241 | 'normal': '高清', | |
242 | } | |
243 | ||
244 | _QUALITIES = { | |
245 | 'super': -1, | |
246 | 'high': -2, | |
247 | 'normal': -3, | |
248 | } | |
249 | ||
250 | _RESOLUTIONS = { | |
251 | 'super': '1920x1080', | |
252 | 'high': '1280x720', | |
253 | 'normal': '852x480', | |
254 | } | |
255 | ||
7274f3d0 YCH |
256 | def _real_extract(self, url): |
257 | url = url.replace('vmobile.', 'v.') | |
258 | video_id = self._match_id(url) | |
259 | ||
260 | webpage = self._download_webpage(url, video_id) | |
261 | ||
21f40e75 | 262 | video_info = self._search_json( |
263 | r'<script>\s*window\.\$DATA\s*=', webpage, | |
264 | 'video info', video_id, transform_source=js_to_json) | |
265 | ||
266 | js_sign_func = self._search_js_sign_func(webpage) | |
267 | form_data = { | |
268 | 'vid': video_id, | |
269 | **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']), | |
270 | } | |
271 | url_info = self._download_json( | |
272 | 'https://v.douyu.com/api/stream/getStreamUrl', video_id, | |
273 | data=urlencode_postdata(form_data), note="Downloading video formats") | |
274 | ||
275 | formats = [] | |
276 | for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)): | |
277 | video_url = traverse_obj(url, ('url', {url_or_none})) | |
278 | if video_url: | |
279 | ext = determine_ext(video_url) | |
280 | formats.append({ | |
281 | 'format': self._FORMATS.get(name), | |
282 | 'format_id': name, | |
283 | 'url': video_url, | |
284 | 'quality': self._QUALITIES.get(name), | |
285 | 'ext': 'mp4' if ext == 'm3u8' else ext, | |
286 | 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', | |
287 | **parse_resolution(self._RESOLUTIONS.get(name)) | |
288 | }) | |
289 | else: | |
290 | self.to_screen( | |
291 | f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}') | |
7274f3d0 YCH |
292 | |
293 | return { | |
294 | 'id': video_id, | |
7274f3d0 | 295 | 'formats': formats, |
21f40e75 | 296 | **traverse_obj(video_info, ('DATA', { |
297 | 'title': ('content', 'title', {str}), | |
298 | 'uploader': ('content', 'author', {str}), | |
299 | 'uploader_id': ('content', 'up_id', {str_or_none}), | |
300 | 'duration': ('content', 'video_duration', {int_or_none}), | |
301 | 'thumbnail': ('content', 'video_pic', {url_or_none}), | |
302 | 'timestamp': ('content', 'create_time', {int_or_none}), | |
303 | 'view_count': ('content', 'view_num', {int_or_none}), | |
304 | 'tags': ('videoTag', ..., 'tagName', {str}), | |
305 | })) | |
7274f3d0 | 306 | } |