]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/douyutv.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / douyutv.py
1 import hashlib
2 import time
3 import urllib
4 import uuid
5
6 from .common import InfoExtractor
7 from .openload import PhantomJSwrapper
8 from ..utils import (
9 ExtractorError,
10 UserNotLive,
11 determine_ext,
12 int_or_none,
13 js_to_json,
14 parse_resolution,
15 str_or_none,
16 traverse_obj,
17 unescapeHTML,
18 url_or_none,
19 urlencode_postdata,
20 urljoin,
21 )
22
23
24 class DouyuBaseIE(InfoExtractor):
25 def _download_cryptojs_md5(self, video_id):
26 for url in [
27 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
28 'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
29 ]:
30 js_code = self._download_webpage(
31 url, video_id, note='Downloading signing dependency', fatal=False)
32 if js_code:
33 self.cache.store('douyu', 'crypto-js-md5', js_code)
34 return js_code
35 raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
36
37 def _get_cryptojs_md5(self, video_id):
38 return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
39
40 def _calc_sign(self, sign_func, video_id, a):
41 b = uuid.uuid4().hex
42 c = round(time.time())
43 js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
44 phantom = PhantomJSwrapper(self)
45 result = phantom.execute(js_script, video_id,
46 note='Executing JS signing script').strip()
47 return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
48
49 def _search_js_sign_func(self, webpage, fatal=True):
50 # The greedy look-behind ensures last possible script tag is matched
51 return self._search_regex(
52 r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
53
54
55 class DouyuTVIE(DouyuBaseIE):
56 IE_DESC = '斗鱼直播'
57 _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
58 _TESTS = [{
59 'url': 'https://www.douyu.com/pigff',
60 'info_dict': {
61 'id': '24422',
62 'display_id': 'pigff',
63 'ext': 'mp4',
64 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
65 'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
66 'thumbnail': str,
67 'uploader': 'pigff',
68 'is_live': True,
69 'live_status': 'is_live',
70 },
71 'params': {
72 'skip_download': True,
73 },
74 }, {
75 'url': 'http://www.douyutv.com/85982',
76 'info_dict': {
77 'id': '85982',
78 'display_id': '85982',
79 'ext': 'flv',
80 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
81 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
82 'thumbnail': r're:^https?://.*\.png',
83 'uploader': 'douyu小漠',
84 'is_live': True,
85 },
86 'params': {
87 'skip_download': True,
88 },
89 'skip': 'Room not found',
90 }, {
91 'url': 'http://www.douyutv.com/17732',
92 'info_dict': {
93 'id': '17732',
94 'display_id': '17732',
95 'ext': 'flv',
96 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
97 'description': r're:.*m7show@163\.com.*',
98 'thumbnail': r're:^https?://.*\.png',
99 'uploader': '7师傅',
100 'is_live': True,
101 },
102 'params': {
103 'skip_download': True,
104 },
105 }, {
106 'url': 'https://www.douyu.com/topic/ydxc?rid=6560603',
107 'info_dict': {
108 'id': '6560603',
109 'display_id': '6560603',
110 'ext': 'flv',
111 'title': 're:^阿余:新年快乐恭喜发财! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
112 'description': 're:.*直播时间.*',
113 'thumbnail': r're:^https?://.*\.png',
114 'uploader': '阿涛皎月Carry',
115 'live_status': 'is_live',
116 },
117 'params': {
118 'skip_download': True,
119 },
120 }, {
121 'url': 'http://www.douyu.com/xiaocang',
122 'only_matching': True,
123 }, {
124 # \"room_id\"
125 'url': 'http://www.douyu.com/t/lpl',
126 'only_matching': True,
127 }]
128
129 def _get_sign_func(self, room_id, video_id):
130 return self._download_json(
131 f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
132 note='Getting signing script')['data'][f'room{room_id}']
133
134 def _extract_stream_formats(self, stream_formats):
135 formats = []
136 for stream_info in traverse_obj(stream_formats, (..., 'data')):
137 stream_url = urljoin(
138 traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
139 if stream_url:
140 rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
141 rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
142 ext = determine_ext(stream_url)
143 formats.append({
144 'url': stream_url,
145 'format_id': str_or_none(rate_id),
146 'ext': 'mp4' if ext == 'm3u8' else ext,
147 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
148 'quality': rate_id % -10000 if rate_id is not None else None,
149 **traverse_obj(rate_info, {
150 'format': ('name', {str_or_none}),
151 'tbr': ('bit', {int_or_none}),
152 }),
153 })
154 return formats
155
156 def _real_extract(self, url):
157 video_id = self._match_id(url)
158
159 webpage = self._download_webpage(url, video_id)
160 room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
161
162 if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
163 raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
164 if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
165 raise UserNotLive(video_id=video_id)
166
167 # Grab metadata from API
168 params = {
169 'aid': 'wp',
170 'client_sys': 'wp',
171 'time': int(time.time()),
172 }
173 params['auth'] = hashlib.md5(
174 f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
175 room = traverse_obj(self._download_json(
176 f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
177 note='Downloading room info', query=params, fatal=False), 'data')
178
179 # 1 = live, 2 = offline
180 if traverse_obj(room, 'show_status') == '2':
181 raise UserNotLive(video_id=video_id)
182
183 js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
184 form_data = {
185 'rate': 0,
186 **self._calc_sign(js_sign_func, video_id, room_id),
187 }
188 stream_formats = [self._download_json(
189 f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
190 video_id, note="Downloading livestream format",
191 data=urlencode_postdata(form_data))]
192
193 for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
194 if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
195 form_data['rate'] = rate_id
196 stream_formats.append(self._download_json(
197 f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
198 video_id, note=f'Downloading livestream format {rate_id}',
199 data=urlencode_postdata(form_data)))
200
201 return {
202 'id': room_id,
203 'formats': self._extract_stream_formats(stream_formats),
204 'is_live': True,
205 **traverse_obj(room, {
206 'display_id': ('url', {str}, {lambda i: i[1:]}),
207 'title': ('room_name', {unescapeHTML}),
208 'description': ('show_details', {str}),
209 'uploader': ('nickname', {str}),
210 'thumbnail': ('room_src', {url_or_none}),
211 })
212 }
213
214
215 class DouyuShowIE(DouyuBaseIE):
216 _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
217
218 _TESTS = [{
219 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
220 'info_dict': {
221 'id': 'mPyq7oVNe5Yv1gLY',
222 'ext': 'mp4',
223 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
224 'duration': 633,
225 'thumbnail': str,
226 'uploader': '美食作家王刚V',
227 'uploader_id': 'OVAO4NVx1m7Q',
228 'timestamp': 1661850002,
229 'upload_date': '20220830',
230 'view_count': int,
231 'tags': ['美食', '美食综合'],
232 },
233 }, {
234 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
235 'only_matching': True,
236 }]
237
238 _FORMATS = {
239 'super': '原画',
240 'high': '超清',
241 'normal': '高清',
242 }
243
244 _QUALITIES = {
245 'super': -1,
246 'high': -2,
247 'normal': -3,
248 }
249
250 _RESOLUTIONS = {
251 'super': '1920x1080',
252 'high': '1280x720',
253 'normal': '852x480',
254 }
255
256 def _real_extract(self, url):
257 url = url.replace('vmobile.', 'v.')
258 video_id = self._match_id(url)
259
260 webpage = self._download_webpage(url, video_id)
261
262 video_info = self._search_json(
263 r'<script>\s*window\.\$DATA\s*=', webpage,
264 'video info', video_id, transform_source=js_to_json)
265
266 js_sign_func = self._search_js_sign_func(webpage)
267 form_data = {
268 'vid': video_id,
269 **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
270 }
271 url_info = self._download_json(
272 'https://v.douyu.com/api/stream/getStreamUrl', video_id,
273 data=urlencode_postdata(form_data), note="Downloading video formats")
274
275 formats = []
276 for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
277 video_url = traverse_obj(url, ('url', {url_or_none}))
278 if video_url:
279 ext = determine_ext(video_url)
280 formats.append({
281 'format': self._FORMATS.get(name),
282 'format_id': name,
283 'url': video_url,
284 'quality': self._QUALITIES.get(name),
285 'ext': 'mp4' if ext == 'm3u8' else ext,
286 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
287 **parse_resolution(self._RESOLUTIONS.get(name))
288 })
289 else:
290 self.to_screen(
291 f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
292
293 return {
294 'id': video_id,
295 'formats': formats,
296 **traverse_obj(video_info, ('DATA', {
297 'title': ('content', 'title', {str}),
298 'uploader': ('content', 'author', {str}),
299 'uploader_id': ('content', 'up_id', {str_or_none}),
300 'duration': ('content', 'video_duration', {int_or_none}),
301 'thumbnail': ('content', 'video_pic', {url_or_none}),
302 'timestamp': ('content', 'create_time', {int_or_none}),
303 'view_count': ('content', 'view_num', {int_or_none}),
304 'tags': ('videoTag', ..., 'tagName', {str}),
305 }))
306 }