]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/fc2.py
[cleanup, docs] Misc cleanup
[yt-dlp.git] / yt_dlp / extractor / fc2.py
CommitLineData
cf0efe96 1# coding: utf-8
8e71456a
PH
2from __future__ import unicode_literals
3
15dfb392
LNO
4import re
5
8e71456a 6from .common import InfoExtractor
1cc79574 7from ..compat import (
cf0efe96 8 compat_parse_qs,
8e71456a 9)
1cc79574
PH
10from ..utils import (
11 ExtractorError,
15dfb392
LNO
12 WebSocketsWrapper,
13 has_websockets,
14 js_to_json,
5c2266df 15 sanitized_Request,
15dfb392 16 std_headers,
d6bc443b 17 traverse_obj,
15dfb392 18 update_url_query,
6e6bc8da 19 urlencode_postdata,
d6bc443b 20 urljoin,
1cc79574 21)
8e71456a
PH
22
23
24class FC2IE(InfoExtractor):
cf0efe96 25 _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
8e71456a 26 IE_NAME = 'fc2'
4231235c 27 _NETRC_MACHINE = 'fc2'
8940b860 28 _TESTS = [{
8e71456a
PH
29 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
30 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
31 'info_dict': {
32 'id': '20121103kUan1KHs',
33 'ext': 'flv',
34 'title': 'Boxing again with Puff',
35 },
8940b860
PH
36 }, {
37 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
38 'info_dict': {
39 'id': '20150125cEva0hDn',
40 'ext': 'mp4',
41 },
42 'params': {
43 'username': 'ytdl@yt-dl.org',
44 'password': '(snip)',
38d05d17
YCH
45 },
46 'skip': 'requires actual password',
52dfb7ff
S
47 }, {
48 'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
49 'only_matching': True,
8940b860 50 }]
4231235c 51
52 def _login(self):
68217024 53 username, password = self._get_login_info()
8940b860
PH
54 if username is None or password is None:
55 return False
4231235c 56
57 # Log in
58 login_form_strs = {
8940b860 59 'email': username,
4231235c 60 'password': password,
8940b860
PH
61 'done': 'video',
62 'Submit': ' Login ',
4231235c 63 }
64
6e6bc8da 65 login_data = urlencode_postdata(login_form_strs)
5c2266df 66 request = sanitized_Request(
4231235c 67 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
68
69 login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
70 if 'mode=redirect&login=done' not in login_results:
8940b860 71 self.report_warning('unable to log in: bad username or password')
4231235c 72 return False
8940b860 73
4231235c 74 # this is also needed
5c2266df 75 login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
8940b860
PH
76 self._download_webpage(
77 login_redir, None, note='Login redirect', errnote='Login redirect failed')
4231235c 78
79 return True
8e71456a
PH
80
81 def _real_extract(self, url):
1cc79574 82 video_id = self._match_id(url)
8940b860 83 self._login()
cf0efe96
YCH
84 webpage = None
85 if not url.startswith('fc2:'):
86 webpage = self._download_webpage(url, video_id)
87 self._downloader.cookiejar.clear_session_cookies() # must clear
88 self._login()
89
d6bc443b 90 title, thumbnail, description = None, None, None
cf0efe96 91 if webpage is not None:
d6bc443b
LNO
92 title = self._html_search_regex(
93 (r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
94 r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
95 # there's two matches in the webpage
96 r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
97 webpage,
98 'title', fatal=False)
cf0efe96 99 thumbnail = self._og_search_thumbnail(webpage)
81c5f44c 100 description = self._og_search_description(webpage, default=None)
8e71456a 101
d6bc443b
LNO
102 vidplaylist = self._download_json(
103 'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
104 note='Downloading info page')
105 vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
106 if not vid_url:
107 raise ExtractorError('Unable to extract video URL')
108 vid_url = urljoin('https://video.fc2.com/', vid_url)
8e71456a
PH
109
110 return {
111 'id': video_id,
23ae281b 112 'title': title,
d6bc443b
LNO
113 'url': vid_url,
114 'ext': 'mp4',
81c5f44c 115 'protocol': 'm3u8_native',
d6bc443b 116 'description': description,
8e71456a
PH
117 'thumbnail': thumbnail,
118 }
cf0efe96
YCH
119
120
121class FC2EmbedIE(InfoExtractor):
122 _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
123 IE_NAME = 'fc2:embed'
124
125 _TEST = {
126 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
127 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
128 'info_dict': {
129 'id': '201403223kCqB3Ez',
130 'ext': 'flv',
131 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
ec85ded8 132 'thumbnail': r're:^https?://.*\.jpg$',
cf0efe96
YCH
133 },
134 }
135
136 def _real_extract(self, url):
5ad28e7f 137 mobj = self._match_valid_url(url)
cf0efe96
YCH
138 query = compat_parse_qs(mobj.group('query'))
139
140 video_id = query['i'][-1]
141 title = query.get('tl', ['FC2 video %s' % video_id])[0]
142
143 sj = query.get('sj', [None])[0]
144 thumbnail = None
145 if sj:
146 # See thumbnailImagePath() in ServerConst.as of flv2.swf
147 thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
148 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
149
150 return {
151 '_type': 'url_transparent',
ed2bfe93 152 'ie_key': FC2IE.ie_key(),
cf0efe96
YCH
153 'url': 'fc2:%s' % video_id,
154 'title': title,
155 'thumbnail': thumbnail,
156 }
15dfb392
LNO
157
158
159class FC2LiveIE(InfoExtractor):
160 _VALID_URL = r'https?://live\.fc2\.com/(?P<id>\d+)'
161 IE_NAME = 'fc2:live'
162
163 _TESTS = [{
164 'url': 'https://live.fc2.com/57892267/',
165 'info_dict': {
166 'id': '57892267',
167 'title': 'どこまで・・・',
168 'uploader': 'あつあげ',
169 'uploader_id': '57892267',
170 'thumbnail': r're:https?://.+fc2.+',
171 },
172 'skip': 'livestream',
173 }]
174
175 def _real_extract(self, url):
176 if not has_websockets:
177 raise ExtractorError('websockets library is not available. Please install it.', expected=True)
178 video_id = self._match_id(url)
179 webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
180
181 self._set_cookie('live.fc2.com', 'js-player_size', '1')
182
183 member_api = self._download_json(
184 'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({
185 'channel': '1',
186 'profile': '1',
187 'user': '1',
188 'streamid': video_id
189 }), note='Requesting member info')
190
191 control_server = self._download_json(
192 'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data',
193 data=urlencode_postdata({
194 'channel_id': video_id,
195 'mode': 'play',
196 'orz': '',
197 'channel_version': member_api['data']['channel_data']['version'],
198 'client_version': '2.1.0\n [1]',
199 'client_type': 'pc',
200 'client_app': 'browser_hls',
201 'ipv6': '',
202 }), headers={'X-Requested-With': 'XMLHttpRequest'})
203 self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
204
205 ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
206 playlist_data = None
207
208 self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id)
209 ws = WebSocketsWrapper(ws_url, {
210 'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
211 'Origin': 'https://live.fc2.com',
212 'Accept': '*/*',
213 'User-Agent': std_headers['User-Agent'],
214 })
215 ws.__enter__()
216
217 self.write_debug('[debug] Sending HLS server request')
218
219 while True:
220 recv = ws.recv()
221 if not recv:
222 continue
223 data = self._parse_json(recv, video_id, fatal=False)
224 if not data or not isinstance(data, dict):
225 continue
226
227 if data.get('name') == 'connect_complete':
228 break
229 ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}')
230
231 while True:
232 recv = ws.recv()
233 if not recv:
234 continue
235 data = self._parse_json(recv, video_id, fatal=False)
236 if not data or not isinstance(data, dict):
237 continue
238 if data.get('name') == '_response_' and data.get('id') == 1:
239 self.write_debug('[debug] Goodbye.')
240 playlist_data = data
241 break
242 elif self._downloader.params.get('verbose', False):
243 if len(recv) > 100:
244 recv = recv[:100] + '...'
245 self.to_screen('[debug] Server said: %s' % recv)
246
247 if not playlist_data:
248 raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
249
250 formats = []
251 for name, playlists in playlist_data['arguments'].items():
252 if not isinstance(playlists, list):
253 continue
254 for pl in playlists:
255 if pl.get('status') == 0 and 'master_playlist' in pl.get('url'):
256 formats.extend(self._extract_m3u8_formats(
257 pl['url'], video_id, ext='mp4', m3u8_id=name, live=True,
258 headers={
259 'Origin': 'https://live.fc2.com',
260 'Referer': url,
261 }))
262
263 self._sort_formats(formats)
264 for fmt in formats:
265 fmt.update({
266 'protocol': 'fc2_live',
267 'ws': ws,
268 })
269
270 title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
271 if not title:
272 title = self._html_extract_title(webpage, 'html title', fatal=False)
273 if title:
274 # remove service name in <title>
275 title = re.sub(r'\s+-\s+.+$', '', title)
276 uploader = None
277 if title:
278 match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2))
279 if match and all(match):
280 title, uploader = match
281
282 live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None
283 if live_info_view:
284 # remove jQuery code from object literal
285 live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view)
286 live_info_view = self._parse_json(js_to_json(live_info_view), video_id)
287
288 return {
289 'id': video_id,
290 'title': title or traverse_obj(live_info_view, 'title'),
291 'description': self._html_search_meta(
292 ('og:description', 'twitter:description'),
293 webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'),
294 'formats': formats,
295 'uploader': uploader or traverse_obj(live_info_view, 'name'),
296 'uploader_id': video_id,
297 'thumbnail': traverse_obj(live_info_view, 'thumb'),
298 'is_live': True,
299 }