]>
Commit | Line | Data |
---|---|---|
1 | import time | |
2 | import hashlib | |
3 | import urllib | |
4 | import uuid | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from .openload import PhantomJSwrapper | |
8 | from ..utils import ( | |
9 | ExtractorError, | |
10 | UserNotLive, | |
11 | determine_ext, | |
12 | int_or_none, | |
13 | js_to_json, | |
14 | parse_resolution, | |
15 | str_or_none, | |
16 | traverse_obj, | |
17 | unescapeHTML, | |
18 | url_or_none, | |
19 | urlencode_postdata, | |
20 | urljoin, | |
21 | ) | |
22 | ||
23 | ||
24 | class DouyuBaseIE(InfoExtractor): | |
25 | def _download_cryptojs_md5(self, video_id): | |
26 | for url in [ | |
27 | 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js', | |
28 | 'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js', | |
29 | ]: | |
30 | js_code = self._download_webpage( | |
31 | url, video_id, note='Downloading signing dependency', fatal=False) | |
32 | if js_code: | |
33 | self.cache.store('douyu', 'crypto-js-md5', js_code) | |
34 | return js_code | |
35 | raise ExtractorError('Unable to download JS dependency (crypto-js/md5)') | |
36 | ||
37 | def _get_cryptojs_md5(self, video_id): | |
38 | return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id) | |
39 | ||
40 | def _calc_sign(self, sign_func, video_id, a): | |
41 | b = uuid.uuid4().hex | |
42 | c = round(time.time()) | |
43 | js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))' | |
44 | phantom = PhantomJSwrapper(self) | |
45 | result = phantom.execute(js_script, video_id, | |
46 | note='Executing JS signing script').strip() | |
47 | return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()} | |
48 | ||
49 | def _search_js_sign_func(self, webpage, fatal=True): | |
50 | # The greedy look-behind ensures last possible script tag is matched | |
51 | return self._search_regex( | |
52 | r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal) | |
53 | ||
54 | ||
55 | class DouyuTVIE(DouyuBaseIE): | |
56 | IE_DESC = '斗鱼直播' | |
57 | _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)' | |
58 | _TESTS = [{ | |
59 | 'url': 'https://www.douyu.com/pigff', | |
60 | 'info_dict': { | |
61 | 'id': '24422', | |
62 | 'display_id': 'pigff', | |
63 | 'ext': 'mp4', | |
64 | 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
65 | 'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群', | |
66 | 'thumbnail': str, | |
67 | 'uploader': 'pigff', | |
68 | 'is_live': True, | |
69 | 'live_status': 'is_live', | |
70 | }, | |
71 | 'params': { | |
72 | 'skip_download': True, | |
73 | }, | |
74 | }, { | |
75 | 'url': 'http://www.douyutv.com/85982', | |
76 | 'info_dict': { | |
77 | 'id': '85982', | |
78 | 'display_id': '85982', | |
79 | 'ext': 'flv', | |
80 | 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
81 | 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', | |
82 | 'thumbnail': r're:^https?://.*\.png', | |
83 | 'uploader': 'douyu小漠', | |
84 | 'is_live': True, | |
85 | }, | |
86 | 'params': { | |
87 | 'skip_download': True, | |
88 | }, | |
89 | 'skip': 'Room not found', | |
90 | }, { | |
91 | 'url': 'http://www.douyutv.com/17732', | |
92 | 'info_dict': { | |
93 | 'id': '17732', | |
94 | 'display_id': '17732', | |
95 | 'ext': 'flv', | |
96 | 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
97 | 'description': r're:.*m7show@163\.com.*', | |
98 | 'thumbnail': r're:^https?://.*\.png', | |
99 | 'uploader': '7师傅', | |
100 | 'is_live': True, | |
101 | }, | |
102 | 'params': { | |
103 | 'skip_download': True, | |
104 | }, | |
105 | }, { | |
106 | 'url': 'https://www.douyu.com/topic/ydxc?rid=6560603', | |
107 | 'info_dict': { | |
108 | 'id': '6560603', | |
109 | 'display_id': '6560603', | |
110 | 'ext': 'flv', | |
111 | 'title': 're:^阿余:新年快乐恭喜发财! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
112 | 'description': 're:.*直播时间.*', | |
113 | 'thumbnail': r're:^https?://.*\.png', | |
114 | 'uploader': '阿涛皎月Carry', | |
115 | 'live_status': 'is_live', | |
116 | }, | |
117 | 'params': { | |
118 | 'skip_download': True, | |
119 | }, | |
120 | }, { | |
121 | 'url': 'http://www.douyu.com/xiaocang', | |
122 | 'only_matching': True, | |
123 | }, { | |
124 | # \"room_id\" | |
125 | 'url': 'http://www.douyu.com/t/lpl', | |
126 | 'only_matching': True, | |
127 | }] | |
128 | ||
129 | def _get_sign_func(self, room_id, video_id): | |
130 | return self._download_json( | |
131 | f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id, | |
132 | note='Getting signing script')['data'][f'room{room_id}'] | |
133 | ||
134 | def _extract_stream_formats(self, stream_formats): | |
135 | formats = [] | |
136 | for stream_info in traverse_obj(stream_formats, (..., 'data')): | |
137 | stream_url = urljoin( | |
138 | traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live')) | |
139 | if stream_url: | |
140 | rate_id = traverse_obj(stream_info, ('rate', {int_or_none})) | |
141 | rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False) | |
142 | ext = determine_ext(stream_url) | |
143 | formats.append({ | |
144 | 'url': stream_url, | |
145 | 'format_id': str_or_none(rate_id), | |
146 | 'ext': 'mp4' if ext == 'm3u8' else ext, | |
147 | 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', | |
148 | 'quality': rate_id % -10000 if rate_id is not None else None, | |
149 | **traverse_obj(rate_info, { | |
150 | 'format': ('name', {str_or_none}), | |
151 | 'tbr': ('bit', {int_or_none}), | |
152 | }), | |
153 | }) | |
154 | return formats | |
155 | ||
156 | def _real_extract(self, url): | |
157 | video_id = self._match_id(url) | |
158 | ||
159 | webpage = self._download_webpage(url, video_id) | |
160 | room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id') | |
161 | ||
162 | if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1': | |
163 | raise UserNotLive('The channel is auto-playing VODs', video_id=video_id) | |
164 | if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2': | |
165 | raise UserNotLive(video_id=video_id) | |
166 | ||
167 | # Grab metadata from API | |
168 | params = { | |
169 | 'aid': 'wp', | |
170 | 'client_sys': 'wp', | |
171 | 'time': int(time.time()), | |
172 | } | |
173 | params['auth'] = hashlib.md5( | |
174 | f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest() | |
175 | room = traverse_obj(self._download_json( | |
176 | f'http://www.douyutv.com/api/v1/room/{room_id}', video_id, | |
177 | note='Downloading room info', query=params, fatal=False), 'data') | |
178 | ||
179 | # 1 = live, 2 = offline | |
180 | if traverse_obj(room, 'show_status') == '2': | |
181 | raise UserNotLive(video_id=video_id) | |
182 | ||
183 | js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id) | |
184 | form_data = { | |
185 | 'rate': 0, | |
186 | **self._calc_sign(js_sign_func, video_id, room_id), | |
187 | } | |
188 | stream_formats = [self._download_json( | |
189 | f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', | |
190 | video_id, note="Downloading livestream format", | |
191 | data=urlencode_postdata(form_data))] | |
192 | ||
193 | for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')): | |
194 | if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')): | |
195 | form_data['rate'] = rate_id | |
196 | stream_formats.append(self._download_json( | |
197 | f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', | |
198 | video_id, note=f'Downloading livestream format {rate_id}', | |
199 | data=urlencode_postdata(form_data))) | |
200 | ||
201 | return { | |
202 | 'id': room_id, | |
203 | 'formats': self._extract_stream_formats(stream_formats), | |
204 | 'is_live': True, | |
205 | **traverse_obj(room, { | |
206 | 'display_id': ('url', {str}, {lambda i: i[1:]}), | |
207 | 'title': ('room_name', {unescapeHTML}), | |
208 | 'description': ('show_details', {str}), | |
209 | 'uploader': ('nickname', {str}), | |
210 | 'thumbnail': ('room_src', {url_or_none}), | |
211 | }) | |
212 | } | |
213 | ||
214 | ||
215 | class DouyuShowIE(DouyuBaseIE): | |
216 | _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)' | |
217 | ||
218 | _TESTS = [{ | |
219 | 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY', | |
220 | 'info_dict': { | |
221 | 'id': 'mPyq7oVNe5Yv1gLY', | |
222 | 'ext': 'mp4', | |
223 | 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃', | |
224 | 'duration': 633, | |
225 | 'thumbnail': str, | |
226 | 'uploader': '美食作家王刚V', | |
227 | 'uploader_id': 'OVAO4NVx1m7Q', | |
228 | 'timestamp': 1661850002, | |
229 | 'upload_date': '20220830', | |
230 | 'view_count': int, | |
231 | 'tags': ['美食', '美食综合'], | |
232 | }, | |
233 | }, { | |
234 | 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw', | |
235 | 'only_matching': True, | |
236 | }] | |
237 | ||
238 | _FORMATS = { | |
239 | 'super': '原画', | |
240 | 'high': '超清', | |
241 | 'normal': '高清', | |
242 | } | |
243 | ||
244 | _QUALITIES = { | |
245 | 'super': -1, | |
246 | 'high': -2, | |
247 | 'normal': -3, | |
248 | } | |
249 | ||
250 | _RESOLUTIONS = { | |
251 | 'super': '1920x1080', | |
252 | 'high': '1280x720', | |
253 | 'normal': '852x480', | |
254 | } | |
255 | ||
256 | def _real_extract(self, url): | |
257 | url = url.replace('vmobile.', 'v.') | |
258 | video_id = self._match_id(url) | |
259 | ||
260 | webpage = self._download_webpage(url, video_id) | |
261 | ||
262 | video_info = self._search_json( | |
263 | r'<script>\s*window\.\$DATA\s*=', webpage, | |
264 | 'video info', video_id, transform_source=js_to_json) | |
265 | ||
266 | js_sign_func = self._search_js_sign_func(webpage) | |
267 | form_data = { | |
268 | 'vid': video_id, | |
269 | **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']), | |
270 | } | |
271 | url_info = self._download_json( | |
272 | 'https://v.douyu.com/api/stream/getStreamUrl', video_id, | |
273 | data=urlencode_postdata(form_data), note="Downloading video formats") | |
274 | ||
275 | formats = [] | |
276 | for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)): | |
277 | video_url = traverse_obj(url, ('url', {url_or_none})) | |
278 | if video_url: | |
279 | ext = determine_ext(video_url) | |
280 | formats.append({ | |
281 | 'format': self._FORMATS.get(name), | |
282 | 'format_id': name, | |
283 | 'url': video_url, | |
284 | 'quality': self._QUALITIES.get(name), | |
285 | 'ext': 'mp4' if ext == 'm3u8' else ext, | |
286 | 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', | |
287 | **parse_resolution(self._RESOLUTIONS.get(name)) | |
288 | }) | |
289 | else: | |
290 | self.to_screen( | |
291 | f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}') | |
292 | ||
293 | return { | |
294 | 'id': video_id, | |
295 | 'formats': formats, | |
296 | **traverse_obj(video_info, ('DATA', { | |
297 | 'title': ('content', 'title', {str}), | |
298 | 'uploader': ('content', 'author', {str}), | |
299 | 'uploader_id': ('content', 'up_id', {str_or_none}), | |
300 | 'duration': ('content', 'video_duration', {int_or_none}), | |
301 | 'thumbnail': ('content', 'video_pic', {url_or_none}), | |
302 | 'timestamp': ('content', 'create_time', {int_or_none}), | |
303 | 'view_count': ('content', 'view_num', {int_or_none}), | |
304 | 'tags': ('videoTag', ..., 'tagName', {str}), | |
305 | })) | |
306 | } |