]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/mildom.py
Fix inconsistent use of `report_warning`
[yt-dlp.git] / yt_dlp / extractor / mildom.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from datetime import datetime
5 import itertools
6 import json
7 import base64
8 import re
9
10 from .common import InfoExtractor
11 from ..utils import (
12 ExtractorError, std_headers,
13 update_url_query,
14 random_uuidv4,
15 try_get,
16 )
17 from ..compat import (
18 compat_urlparse,
19 compat_urllib_parse_urlencode,
20 compat_str,
21 )
22
23
24 class MildomBaseIE(InfoExtractor):
25 _GUEST_ID = None
26 _DISPATCHER_CONFIG = None
27
28 def _call_api(self, url, video_id, query={}, note='Downloading JSON metadata', init=False):
29 url = update_url_query(url, self._common_queries(query, init=init))
30 return self._download_json(url, video_id, note=note)['body']
31
32 def _common_queries(self, query={}, init=False):
33 dc = self._fetch_dispatcher_config()
34 r = {
35 'timestamp': self.iso_timestamp(),
36 '__guest_id': '' if init else self.guest_id(),
37 '__location': dc['location'],
38 '__country': dc['country'],
39 '__cluster': dc['cluster'],
40 '__platform': 'web',
41 '__la': self.lang_code(),
42 '__pcv': 'v2.9.44',
43 'sfr': 'pc',
44 'accessToken': '',
45 }
46 r.update(query)
47 return r
48
49 def _fetch_dispatcher_config(self):
50 if not self._DISPATCHER_CONFIG:
51 try:
52 tmp = self._download_json(
53 'https://disp.mildom.com/serverListV2', 'initialization',
54 note='Downloading dispatcher_config', data=json.dumps({
55 'protover': 0,
56 'data': base64.b64encode(json.dumps({
57 'fr': 'web',
58 'sfr': 'pc',
59 'devi': 'Windows',
60 'la': 'ja',
61 'gid': None,
62 'loc': '',
63 'clu': '',
64 'wh': '1919*810',
65 'rtm': self.iso_timestamp(),
66 'ua': std_headers['User-Agent'],
67 }).encode('utf8')).decode('utf8').replace('\n', ''),
68 }).encode('utf8'))
69 self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
70 except ExtractorError:
71 self._DISPATCHER_CONFIG = self._download_json(
72 'https://bookish-octo-barnacle.vercel.app/api/mildom/dispatcher_config', 'initialization',
73 note='Downloading dispatcher_config fallback')
74 return self._DISPATCHER_CONFIG
75
76 @staticmethod
77 def iso_timestamp():
78 'new Date().toISOString()'
79 return datetime.utcnow().isoformat()[0:-3] + 'Z'
80
81 def guest_id(self):
82 'getGuestId'
83 if self._GUEST_ID:
84 return self._GUEST_ID
85 self._GUEST_ID = try_get(
86 self, (
87 lambda x: x._call_api(
88 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization',
89 note='Downloading guest token', init=True)['guest_id'] or None,
90 lambda x: x._get_cookies('https://www.mildom.com').get('gid').value,
91 lambda x: x._get_cookies('https://m.mildom.com').get('gid').value,
92 ), compat_str) or ''
93 return self._GUEST_ID
94
95 def lang_code(self):
96 'getCurrentLangCode'
97 return 'ja'
98
99
100 class MildomIE(MildomBaseIE):
101 IE_NAME = 'mildom'
102 IE_DESC = 'Record ongoing live by specific user in Mildom'
103 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
104
105 def _real_extract(self, url):
106 video_id = self._match_id(url)
107 url = 'https://www.mildom.com/%s' % video_id
108
109 webpage = self._download_webpage(url, video_id)
110
111 enterstudio = self._call_api(
112 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
113 note='Downloading live metadata', query={'user_id': video_id})
114 result_video_id = enterstudio.get('log_id', video_id)
115
116 title = try_get(
117 enterstudio, (
118 lambda x: self._html_search_meta('twitter:description', webpage),
119 lambda x: x['anchor_intro'],
120 ), compat_str)
121 description = try_get(
122 enterstudio, (
123 lambda x: x['intro'],
124 lambda x: x['live_intro'],
125 ), compat_str)
126 uploader = try_get(
127 enterstudio, (
128 lambda x: self._html_search_meta('twitter:title', webpage),
129 lambda x: x['loginname'],
130 ), compat_str)
131
132 servers = self._call_api(
133 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
134 note='Downloading live server list', query={
135 'user_id': video_id,
136 'live_server_type': 'hls',
137 })
138
139 stream_query = self._common_queries({
140 'streamReqId': random_uuidv4(),
141 'is_lhls': '0',
142 })
143 m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
144 formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
145 'Referer': 'https://www.mildom.com/',
146 'Origin': 'https://www.mildom.com',
147 }, note='Downloading m3u8 information')
148 del stream_query['streamReqId'], stream_query['timestamp']
149 for fmt in formats:
150 # Uses https://github.com/nao20010128nao/bookish-octo-barnacle by @nao20010128nao as a proxy
151 parsed = compat_urlparse.urlparse(fmt['url'])
152 parsed = parsed._replace(
153 netloc='bookish-octo-barnacle.vercel.app',
154 query=compat_urllib_parse_urlencode(stream_query, True),
155 path='/api/mildom' + parsed.path)
156 fmt['url'] = compat_urlparse.urlunparse(parsed)
157
158 self._sort_formats(formats)
159
160 return {
161 'id': result_video_id,
162 'title': title,
163 'description': description,
164 'uploader': uploader,
165 'uploader_id': video_id,
166 'formats': formats,
167 'is_live': True,
168 }
169
170
171 class MildomVodIE(MildomBaseIE):
172 IE_NAME = 'mildom:vod'
173 IE_DESC = 'Download a VOD in Mildom'
174 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
175
176 def _real_extract(self, url):
177 m = re.match(self._VALID_URL, url)
178 user_id, video_id = m.group('user_id'), m.group('id')
179 url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
180
181 webpage = self._download_webpage(url, video_id)
182
183 autoplay = self._call_api(
184 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
185 note='Downloading playback metadata', query={
186 'v_id': video_id,
187 })['playback']
188
189 title = try_get(
190 autoplay, (
191 lambda x: self._html_search_meta('og:description', webpage),
192 lambda x: x['title'],
193 ), compat_str)
194 description = try_get(
195 autoplay, (
196 lambda x: x['video_intro'],
197 ), compat_str)
198 uploader = try_get(
199 autoplay, (
200 lambda x: x['author_info']['login_name'],
201 ), compat_str)
202
203 audio_formats = [{
204 'url': autoplay['audio_url'],
205 'format_id': 'audio',
206 'protocol': 'm3u8_native',
207 'vcodec': 'none',
208 'acodec': 'aac',
209 }]
210 video_formats = []
211 for fmt in autoplay['video_link']:
212 video_formats.append({
213 'format_id': 'video-%s' % fmt['name'],
214 'url': fmt['url'],
215 'protocol': 'm3u8_native',
216 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
217 'height': fmt['level'],
218 'vcodec': 'h264',
219 'acodec': 'aac',
220 })
221
222 stream_query = self._common_queries({
223 'is_lhls': '0',
224 })
225 del stream_query['timestamp']
226 formats = audio_formats + video_formats
227 for fmt in formats:
228 fmt['ext'] = 'mp4'
229 parsed = compat_urlparse.urlparse(fmt['url'])
230 stream_query['path'] = parsed.path[5:]
231 parsed = parsed._replace(
232 netloc='bookish-octo-barnacle.vercel.app',
233 query=compat_urllib_parse_urlencode(stream_query, True),
234 path='/api/mildom/vod2/proxy')
235 fmt['url'] = compat_urlparse.urlunparse(parsed)
236
237 self._sort_formats(formats)
238
239 return {
240 'id': video_id,
241 'title': title,
242 'description': description,
243 'uploader': uploader,
244 'uploader_id': user_id,
245 'formats': formats,
246 }
247
248
249 class MildomUserVodIE(MildomBaseIE):
250 IE_NAME = 'mildom:user:vod'
251 IE_DESC = 'Download all VODs from specific user in Mildom'
252 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
253 _TESTS = [{
254 'url': 'https://www.mildom.com/profile/10093333',
255 'info_dict': {
256 'id': '10093333',
257 'title': 'Uploads from ねこばたけ',
258 },
259 'playlist_mincount': 351,
260 }]
261
262 def _real_extract(self, url):
263 user_id = self._match_id(url)
264
265 self.report_warning('To download ongoing live, please use "https://www.mildom.com/%s" instead. This will list up VODs belonging to user.' % user_id)
266
267 profile = self._call_api(
268 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
269 query={'user_id': user_id}, note='Downloading user profile')['user_info']
270
271 results = []
272 for page in itertools.count(1):
273 reply = self._call_api(
274 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
275 user_id, note='Downloading page %d' % page, query={
276 'user_id': user_id,
277 'page': page,
278 'limit': '30',
279 })
280 if not reply:
281 break
282 results.extend('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']) for x in reply)
283 return self.playlist_result([
284 self.url_result(u, ie=MildomVodIE.ie_key()) for u in results
285 ], user_id, 'Uploads from %s' % profile['loginname'])