]> jfr.im git - yt-dlp.git/blob - youtube_dlc/extractor/mildom.py
Fix `--windows-filenames` removing `/` from UNIX paths
[yt-dlp.git] / youtube_dlc / extractor / mildom.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from datetime import datetime
5 import itertools
6 import json
7 import base64
8
9 from .common import InfoExtractor
10 from ..utils import (
11 ExtractorError, std_headers,
12 update_url_query,
13 random_uuidv4,
14 try_get,
15 )
16 from ..compat import (
17 compat_urlparse,
18 compat_urllib_parse_urlencode,
19 compat_str,
20 )
21
22
23 class MildomBaseIE(InfoExtractor):
24 _GUEST_ID = None
25 _DISPATCHER_CONFIG = None
26
27 def _call_api(self, url, video_id, query={}, note='Downloading JSON metadata', init=False):
28 url = update_url_query(url, self._common_queries(query, init=init))
29 return self._download_json(url, video_id, note=note)['body']
30
31 def _common_queries(self, query={}, init=False):
32 dc = self._fetch_dispatcher_config()
33 r = {
34 'timestamp': self.iso_timestamp(),
35 '__guest_id': '' if init else self.guest_id(),
36 '__location': dc['location'],
37 '__country': dc['country'],
38 '__cluster': dc['cluster'],
39 '__platform': 'web',
40 '__la': self.lang_code(),
41 '__pcv': 'v2.9.44',
42 'sfr': 'pc',
43 'accessToken': '',
44 }
45 r.update(query)
46 return r
47
48 def _fetch_dispatcher_config(self):
49 if not self._DISPATCHER_CONFIG:
50 try:
51 tmp = self._download_json(
52 'https://disp.mildom.com/serverListV2', 'initialization',
53 note='Downloading dispatcher_config', data=json.dumps({
54 'protover': 0,
55 'data': base64.b64encode(json.dumps({
56 'fr': 'web',
57 'sfr': 'pc',
58 'devi': 'Windows',
59 'la': 'ja',
60 'gid': None,
61 'loc': '',
62 'clu': '',
63 'wh': '1919*810',
64 'rtm': self.iso_timestamp(),
65 'ua': std_headers['User-Agent'],
66 }).encode('utf8')).decode('utf8').replace('\n', ''),
67 }).encode('utf8'))
68 self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
69 except ExtractorError:
70 self._DISPATCHER_CONFIG = self._download_json(
71 'https://bookish-octo-barnacle.vercel.app/api/dispatcher_config', 'initialization',
72 note='Downloading dispatcher_config fallback')
73 return self._DISPATCHER_CONFIG
74
75 @staticmethod
76 def iso_timestamp():
77 'new Date().toISOString()'
78 return datetime.utcnow().isoformat()[0:-3] + 'Z'
79
80 def guest_id(self):
81 'getGuestId'
82 if self._GUEST_ID:
83 return self._GUEST_ID
84 self._GUEST_ID = try_get(
85 self, (
86 lambda x: x._call_api(
87 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization',
88 note='Downloading guest token', init=True)['guest_id'] or None,
89 lambda x: x._get_cookies('https://www.mildom.com').get('gid').value,
90 lambda x: x._get_cookies('https://m.mildom.com').get('gid').value,
91 ), compat_str) or ''
92 return self._GUEST_ID
93
94 def lang_code(self):
95 'getCurrentLangCode'
96 return 'ja'
97
98
99 class MildomIE(MildomBaseIE):
100 IE_NAME = 'mildom'
101 IE_DESC = 'Record ongoing live by specific user in Mildom'
102 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
103
104 def _real_extract(self, url):
105 video_id = self._match_id(url)
106 url = 'https://www.mildom.com/%s' % video_id
107
108 webpage = self._download_webpage(url, video_id)
109
110 enterstudio = self._call_api(
111 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
112 note='Downloading live metadata', query={'user_id': video_id})
113
114 title = try_get(
115 enterstudio, (
116 lambda x: self._html_search_meta('twitter:description', webpage),
117 lambda x: x['anchor_intro'],
118 ), compat_str)
119 description = try_get(
120 enterstudio, (
121 lambda x: x['intro'],
122 lambda x: x['live_intro'],
123 ), compat_str)
124 uploader = try_get(
125 enterstudio, (
126 lambda x: self._html_search_meta('twitter:title', webpage),
127 lambda x: x['loginname'],
128 ), compat_str)
129
130 servers = self._call_api(
131 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', video_id,
132 note='Downloading live server list', query={
133 'user_id': video_id,
134 'live_server_type': 'hls',
135 })
136
137 stream_query = self._common_queries({
138 'streamReqId': random_uuidv4(),
139 'is_lhls': '0',
140 })
141 m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
142 formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', headers={
143 'Referer': 'https://www.mildom.com/',
144 'Origin': 'https://www.mildom.com',
145 }, note='Downloading m3u8 information')
146 del stream_query['streamReqId'], stream_query['timestamp']
147 for fmt in formats:
148 # Uses https://github.com/nao20010128nao/bookish-octo-barnacle by @nao20010128nao as a proxy
149 parsed = compat_urlparse.urlparse(fmt['url'])
150 parsed = parsed._replace(
151 netloc='bookish-octo-barnacle.vercel.app',
152 query=compat_urllib_parse_urlencode(stream_query, True),
153 path='/api' + parsed.path)
154 fmt['url'] = compat_urlparse.urlunparse(parsed)
155
156 self._sort_formats(formats)
157
158 return {
159 'id': video_id,
160 'title': title,
161 'description': description,
162 'uploader': uploader,
163 'uploader_id': video_id,
164 'formats': formats,
165 'is_live': True,
166 }
167
168
169 class MildomVodIE(MildomBaseIE):
170 IE_NAME = 'mildom:vod'
171 IE_DESC = 'Download a VOD in Mildom'
172 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
173
174 def _real_extract(self, url):
175 video_id = self._match_id(url)
176 m = self._VALID_URL_RE.match(url)
177 user_id = m.group('user_id')
178 url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
179
180 webpage = self._download_webpage(url, video_id)
181
182 autoplay = self._call_api(
183 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
184 note='Downloading playback metadata', query={
185 'v_id': video_id,
186 })['playback']
187
188 title = try_get(
189 autoplay, (
190 lambda x: self._html_search_meta('og:description', webpage),
191 lambda x: x['title'],
192 ), compat_str)
193 description = try_get(
194 autoplay, (
195 lambda x: x['video_intro'],
196 ), compat_str)
197 uploader = try_get(
198 autoplay, (
199 lambda x: x['author_info']['login_name'],
200 ), compat_str)
201
202 audio_formats = [{
203 'url': autoplay['audio_url'],
204 'format_id': 'audio',
205 'protocol': 'm3u8_native',
206 'vcodec': 'none',
207 'acodec': 'aac',
208 }]
209 video_formats = []
210 for fmt in autoplay['video_link']:
211 video_formats.append({
212 'format_id': 'video-%s' % fmt['name'],
213 'url': fmt['url'],
214 'protocol': 'm3u8_native',
215 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
216 'height': fmt['level'],
217 'vcodec': 'h264',
218 'acodec': 'aac',
219 })
220
221 stream_query = self._common_queries({
222 'is_lhls': '0',
223 })
224 del stream_query['timestamp']
225 formats = audio_formats + video_formats
226 for fmt in formats:
227 fmt['ext'] = 'mp4'
228 parsed = compat_urlparse.urlparse(fmt['url'])
229 stream_query['path'] = parsed.path[5:]
230 parsed = parsed._replace(
231 netloc='bookish-octo-barnacle.vercel.app',
232 query=compat_urllib_parse_urlencode(stream_query, True),
233 path='/api/vod2/proxy')
234 fmt['url'] = compat_urlparse.urlunparse(parsed)
235
236 self._sort_formats(formats)
237
238 return {
239 'id': video_id,
240 'title': title,
241 'description': description,
242 'uploader': uploader,
243 'uploader_id': user_id,
244 'formats': formats,
245 }
246
247
248 class MildomUserVodIE(MildomBaseIE):
249 IE_NAME = 'mildom:user:vod'
250 IE_DESC = 'Download all VODs from specific user in Mildom'
251 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
252 _TESTS = [{
253 'url': 'https://www.mildom.com/profile/10093333',
254 'info_dict': {
255 'id': '10093333',
256 'title': 'Uploads from ねこばたけ',
257 },
258 'playlist_mincount': 351,
259 }]
260
261 def _real_extract(self, url):
262 user_id = self._match_id(url)
263
264 self._downloader.report_warning('To download ongoing live, please use "https://www.mildom.com/%s" instead. This will list up VODs belonging to user.' % user_id)
265
266 profile = self._call_api(
267 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
268 query={'user_id': user_id}, note='Downloading user profile')['user_info']
269
270 results = []
271 for page in itertools.count(1):
272 reply = self._call_api(
273 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
274 user_id, note='Downloading page %d' % page, query={
275 'user_id': user_id,
276 'page': page,
277 'limit': '30',
278 })
279 if not reply:
280 break
281 results.extend('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']) for x in reply)
282 return self.playlist_result([
283 self.url_result(u, ie=MildomVodIE.ie_key()) for u in results
284 ], user_id, 'Uploads from %s' % profile['loginname'])