]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/mildom.py
[mildom:vod] Remove proxy
[yt-dlp.git] / yt_dlp / extractor / mildom.py
CommitLineData
ffcb8191
THD
1# coding: utf-8
2from __future__ import unicode_literals
3
2cff4959 4import base64
ffcb8191
THD
5from datetime import datetime
6import itertools
7import json
5d39972e 8import re
2cff4959 9import random
ffcb8191
THD
10
11from .common import InfoExtractor
12from ..utils import (
13 ExtractorError, std_headers,
14 update_url_query,
15 random_uuidv4,
16 try_get,
17)
18from ..compat import (
19 compat_urlparse,
20 compat_urllib_parse_urlencode,
21 compat_str,
22)
23
24
25class MildomBaseIE(InfoExtractor):
26 _GUEST_ID = None
27 _DISPATCHER_CONFIG = None
28
2cff4959 29 # Proxies provided by @nao20010128nao
30 # See https://github.com/nao20010128nao/bookish-octo-barnacle
31 _MILDOM_PROXY_HOSTS = (
32 # 'bookish-octo-barnacle.vercel.app', # see https://github.com/yt-dlp/yt-dlp/issues/251
33 'free-mountain-goal.glitch.me',
34 'lesmih0sted.f5.si',
35 )
36
37 def _mildom_proxy_host(self):
38 return random.choice(self._MILDOM_PROXY_HOSTS)
39
ffcb8191
THD
40 def _call_api(self, url, video_id, query={}, note='Downloading JSON metadata', init=False):
41 url = update_url_query(url, self._common_queries(query, init=init))
42 return self._download_json(url, video_id, note=note)['body']
43
44 def _common_queries(self, query={}, init=False):
45 dc = self._fetch_dispatcher_config()
46 r = {
47 'timestamp': self.iso_timestamp(),
48 '__guest_id': '' if init else self.guest_id(),
49 '__location': dc['location'],
50 '__country': dc['country'],
51 '__cluster': dc['cluster'],
52 '__platform': 'web',
53 '__la': self.lang_code(),
54 '__pcv': 'v2.9.44',
55 'sfr': 'pc',
56 'accessToken': '',
57 }
58 r.update(query)
59 return r
60
61 def _fetch_dispatcher_config(self):
62 if not self._DISPATCHER_CONFIG:
63 try:
64 tmp = self._download_json(
65 'https://disp.mildom.com/serverListV2', 'initialization',
66 note='Downloading dispatcher_config', data=json.dumps({
67 'protover': 0,
68 'data': base64.b64encode(json.dumps({
69 'fr': 'web',
70 'sfr': 'pc',
71 'devi': 'Windows',
72 'la': 'ja',
73 'gid': None,
74 'loc': '',
75 'clu': '',
76 'wh': '1919*810',
77 'rtm': self.iso_timestamp(),
78 'ua': std_headers['User-Agent'],
79 }).encode('utf8')).decode('utf8').replace('\n', ''),
80 }).encode('utf8'))
81 self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
82 except ExtractorError:
83 self._DISPATCHER_CONFIG = self._download_json(
2cff4959 84 'https://%s/api/mildom/dispatcher_config' % self._mildom_proxy_host(), 'initialization',
ffcb8191
THD
85 note='Downloading dispatcher_config fallback')
86 return self._DISPATCHER_CONFIG
87
88 @staticmethod
89 def iso_timestamp():
90 'new Date().toISOString()'
91 return datetime.utcnow().isoformat()[0:-3] + 'Z'
92
93 def guest_id(self):
94 'getGuestId'
95 if self._GUEST_ID:
96 return self._GUEST_ID
97 self._GUEST_ID = try_get(
98 self, (
99 lambda x: x._call_api(
100 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization',
101 note='Downloading guest token', init=True)['guest_id'] or None,
102 lambda x: x._get_cookies('https://www.mildom.com').get('gid').value,
103 lambda x: x._get_cookies('https://m.mildom.com').get('gid').value,
104 ), compat_str) or ''
105 return self._GUEST_ID
106
107 def lang_code(self):
108 'getCurrentLangCode'
109 return 'ja'
110
111
112class MildomIE(MildomBaseIE):
113 IE_NAME = 'mildom'
114 IE_DESC = 'Record ongoing live by specific user in Mildom'
115 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
116
117 def _real_extract(self, url):
118 video_id = self._match_id(url)
119 url = 'https://www.mildom.com/%s' % video_id
120
121 webpage = self._download_webpage(url, video_id)
122
123 enterstudio = self._call_api(
124 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
125 note='Downloading live metadata', query={'user_id': video_id})
5d39972e 126 result_video_id = enterstudio.get('log_id', video_id)
ffcb8191
THD
127
128 title = try_get(
129 enterstudio, (
130 lambda x: self._html_search_meta('twitter:description', webpage),
131 lambda x: x['anchor_intro'],
132 ), compat_str)
133 description = try_get(
134 enterstudio, (
135 lambda x: x['intro'],
136 lambda x: x['live_intro'],
137 ), compat_str)
138 uploader = try_get(
139 enterstudio, (
140 lambda x: self._html_search_meta('twitter:title', webpage),
141 lambda x: x['loginname'],
142 ), compat_str)
143
144 servers = self._call_api(
5d39972e 145 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
ffcb8191
THD
146 note='Downloading live server list', query={
147 'user_id': video_id,
148 'live_server_type': 'hls',
149 })
150
151 stream_query = self._common_queries({
152 'streamReqId': random_uuidv4(),
153 'is_lhls': '0',
154 })
155 m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
5d39972e 156 formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
ffcb8191
THD
157 'Referer': 'https://www.mildom.com/',
158 'Origin': 'https://www.mildom.com',
159 }, note='Downloading m3u8 information')
2cff4959 160
ffcb8191
THD
161 del stream_query['streamReqId'], stream_query['timestamp']
162 for fmt in formats:
ffcb8191
THD
163 parsed = compat_urlparse.urlparse(fmt['url'])
164 parsed = parsed._replace(
2cff4959 165 netloc=self._mildom_proxy_host(),
ffcb8191 166 query=compat_urllib_parse_urlencode(stream_query, True),
5d39972e 167 path='/api/mildom' + parsed.path)
ffcb8191
THD
168 fmt['url'] = compat_urlparse.urlunparse(parsed)
169
170 self._sort_formats(formats)
171
172 return {
5d39972e 173 'id': result_video_id,
ffcb8191
THD
174 'title': title,
175 'description': description,
176 'uploader': uploader,
177 'uploader_id': video_id,
178 'formats': formats,
179 'is_live': True,
180 }
181
182
183class MildomVodIE(MildomBaseIE):
184 IE_NAME = 'mildom:vod'
185 IE_DESC = 'Download a VOD in Mildom'
186 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
187
188 def _real_extract(self, url):
5d39972e
THD
189 m = re.match(self._VALID_URL, url)
190 user_id, video_id = m.group('user_id'), m.group('id')
ffcb8191
THD
191 url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
192
193 webpage = self._download_webpage(url, video_id)
194
195 autoplay = self._call_api(
196 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
197 note='Downloading playback metadata', query={
198 'v_id': video_id,
199 })['playback']
200
201 title = try_get(
202 autoplay, (
203 lambda x: self._html_search_meta('og:description', webpage),
204 lambda x: x['title'],
205 ), compat_str)
206 description = try_get(
207 autoplay, (
208 lambda x: x['video_intro'],
209 ), compat_str)
210 uploader = try_get(
211 autoplay, (
212 lambda x: x['author_info']['login_name'],
213 ), compat_str)
214
c1df120e 215 formats = [{
ffcb8191
THD
216 'url': autoplay['audio_url'],
217 'format_id': 'audio',
218 'protocol': 'm3u8_native',
219 'vcodec': 'none',
220 'acodec': 'aac',
c1df120e 221 'ext': 'm4a'
ffcb8191 222 }]
ffcb8191 223 for fmt in autoplay['video_link']:
c1df120e 224 formats.append({
ffcb8191
THD
225 'format_id': 'video-%s' % fmt['name'],
226 'url': fmt['url'],
227 'protocol': 'm3u8_native',
228 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
229 'height': fmt['level'],
230 'vcodec': 'h264',
231 'acodec': 'aac',
c1df120e 232 'ext': 'mp4'
ffcb8191
THD
233 })
234
c1df120e 235 r''' # Proxy is not needed for VODs
ffcb8191
THD
236 stream_query = self._common_queries({
237 'is_lhls': '0',
238 })
239 del stream_query['timestamp']
ffcb8191 240 for fmt in formats:
ffcb8191
THD
241 parsed = compat_urlparse.urlparse(fmt['url'])
242 stream_query['path'] = parsed.path[5:]
243 parsed = parsed._replace(
2cff4959 244 netloc=self._mildom_proxy_host(),
ffcb8191 245 query=compat_urllib_parse_urlencode(stream_query, True),
5d39972e 246 path='/api/mildom/vod2/proxy')
ffcb8191 247 fmt['url'] = compat_urlparse.urlunparse(parsed)
c1df120e 248 '''
ffcb8191
THD
249
250 self._sort_formats(formats)
251
252 return {
253 'id': video_id,
254 'title': title,
255 'description': description,
256 'uploader': uploader,
257 'uploader_id': user_id,
258 'formats': formats,
259 }
260
261
262class MildomUserVodIE(MildomBaseIE):
263 IE_NAME = 'mildom:user:vod'
264 IE_DESC = 'Download all VODs from specific user in Mildom'
265 _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
266 _TESTS = [{
267 'url': 'https://www.mildom.com/profile/10093333',
268 'info_dict': {
269 'id': '10093333',
270 'title': 'Uploads from ねこばたけ',
271 },
272 'playlist_mincount': 351,
273 }]
274
275 def _real_extract(self, url):
276 user_id = self._match_id(url)
277
6a39ee13 278 self.report_warning('To download ongoing live, please use "https://www.mildom.com/%s" instead. This will list up VODs belonging to user.' % user_id)
ffcb8191
THD
279
280 profile = self._call_api(
281 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
282 query={'user_id': user_id}, note='Downloading user profile')['user_info']
283
284 results = []
285 for page in itertools.count(1):
286 reply = self._call_api(
287 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
288 user_id, note='Downloading page %d' % page, query={
289 'user_id': user_id,
290 'page': page,
291 'limit': '30',
292 })
293 if not reply:
294 break
295 results.extend('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']) for x in reply)
296 return self.playlist_result([
297 self.url_result(u, ie=MildomVodIE.ie_key()) for u in results
298 ], user_id, 'Uploads from %s' % profile['loginname'])