]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/fc2.py
[cleanup] Misc fixes and cleanup
[yt-dlp.git] / yt_dlp / extractor / fc2.py
CommitLineData
15dfb392
LNO
1import re
2
8e71456a 3from .common import InfoExtractor
8a82af35 4from ..compat import compat_parse_qs
9b8ee23b 5from ..dependencies import websockets
1cc79574
PH
6from ..utils import (
7 ExtractorError,
15dfb392 8 WebSocketsWrapper,
15dfb392 9 js_to_json,
5c2266df 10 sanitized_Request,
d6bc443b 11 traverse_obj,
15dfb392 12 update_url_query,
6e6bc8da 13 urlencode_postdata,
d6bc443b 14 urljoin,
1cc79574 15)
8e71456a
PH
16
17
18class FC2IE(InfoExtractor):
cf0efe96 19 _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
8e71456a 20 IE_NAME = 'fc2'
4231235c 21 _NETRC_MACHINE = 'fc2'
8940b860 22 _TESTS = [{
8e71456a
PH
23 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
24 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
25 'info_dict': {
26 'id': '20121103kUan1KHs',
27 'ext': 'flv',
28 'title': 'Boxing again with Puff',
29 },
8940b860
PH
30 }, {
31 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
32 'info_dict': {
33 'id': '20150125cEva0hDn',
34 'ext': 'mp4',
35 },
36 'params': {
37 'username': 'ytdl@yt-dl.org',
38 'password': '(snip)',
38d05d17
YCH
39 },
40 'skip': 'requires actual password',
52dfb7ff
S
41 }, {
42 'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
43 'only_matching': True,
8940b860 44 }]
4231235c 45
46 def _login(self):
68217024 47 username, password = self._get_login_info()
8940b860
PH
48 if username is None or password is None:
49 return False
4231235c 50
51 # Log in
52 login_form_strs = {
8940b860 53 'email': username,
4231235c 54 'password': password,
8940b860
PH
55 'done': 'video',
56 'Submit': ' Login ',
4231235c 57 }
58
6e6bc8da 59 login_data = urlencode_postdata(login_form_strs)
5c2266df 60 request = sanitized_Request(
4231235c 61 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
62
63 login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
64 if 'mode=redirect&login=done' not in login_results:
8940b860 65 self.report_warning('unable to log in: bad username or password')
4231235c 66 return False
8940b860 67
4231235c 68 # this is also needed
5c2266df 69 login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
8940b860
PH
70 self._download_webpage(
71 login_redir, None, note='Login redirect', errnote='Login redirect failed')
4231235c 72
73 return True
8e71456a
PH
74
75 def _real_extract(self, url):
1cc79574 76 video_id = self._match_id(url)
8940b860 77 self._login()
cf0efe96
YCH
78 webpage = None
79 if not url.startswith('fc2:'):
80 webpage = self._download_webpage(url, video_id)
81 self._downloader.cookiejar.clear_session_cookies() # must clear
82 self._login()
83
d6bc443b 84 title, thumbnail, description = None, None, None
cf0efe96 85 if webpage is not None:
d6bc443b
LNO
86 title = self._html_search_regex(
87 (r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
88 r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
89 # there's two matches in the webpage
90 r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
91 webpage,
92 'title', fatal=False)
cf0efe96 93 thumbnail = self._og_search_thumbnail(webpage)
81c5f44c 94 description = self._og_search_description(webpage, default=None)
8e71456a 95
d6bc443b
LNO
96 vidplaylist = self._download_json(
97 'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
98 note='Downloading info page')
99 vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
100 if not vid_url:
101 raise ExtractorError('Unable to extract video URL')
102 vid_url = urljoin('https://video.fc2.com/', vid_url)
8e71456a
PH
103
104 return {
105 'id': video_id,
23ae281b 106 'title': title,
d6bc443b
LNO
107 'url': vid_url,
108 'ext': 'mp4',
81c5f44c 109 'protocol': 'm3u8_native',
d6bc443b 110 'description': description,
8e71456a
PH
111 'thumbnail': thumbnail,
112 }
cf0efe96
YCH
113
114
115class FC2EmbedIE(InfoExtractor):
116 _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
117 IE_NAME = 'fc2:embed'
118
119 _TEST = {
120 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
121 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
122 'info_dict': {
123 'id': '201403223kCqB3Ez',
124 'ext': 'flv',
125 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
ec85ded8 126 'thumbnail': r're:^https?://.*\.jpg$',
cf0efe96
YCH
127 },
128 }
129
130 def _real_extract(self, url):
5ad28e7f 131 mobj = self._match_valid_url(url)
cf0efe96
YCH
132 query = compat_parse_qs(mobj.group('query'))
133
134 video_id = query['i'][-1]
135 title = query.get('tl', ['FC2 video %s' % video_id])[0]
136
137 sj = query.get('sj', [None])[0]
138 thumbnail = None
139 if sj:
140 # See thumbnailImagePath() in ServerConst.as of flv2.swf
141 thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
142 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
143
144 return {
145 '_type': 'url_transparent',
ed2bfe93 146 'ie_key': FC2IE.ie_key(),
cf0efe96
YCH
147 'url': 'fc2:%s' % video_id,
148 'title': title,
149 'thumbnail': thumbnail,
150 }
15dfb392
LNO
151
152
153class FC2LiveIE(InfoExtractor):
154 _VALID_URL = r'https?://live\.fc2\.com/(?P<id>\d+)'
155 IE_NAME = 'fc2:live'
156
157 _TESTS = [{
158 'url': 'https://live.fc2.com/57892267/',
159 'info_dict': {
160 'id': '57892267',
161 'title': 'どこまで・・・',
162 'uploader': 'あつあげ',
163 'uploader_id': '57892267',
164 'thumbnail': r're:https?://.+fc2.+',
165 },
166 'skip': 'livestream',
167 }]
168
169 def _real_extract(self, url):
9b8ee23b 170 if not websockets:
15dfb392
LNO
171 raise ExtractorError('websockets library is not available. Please install it.', expected=True)
172 video_id = self._match_id(url)
173 webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
174
175 self._set_cookie('live.fc2.com', 'js-player_size', '1')
176
177 member_api = self._download_json(
178 'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({
179 'channel': '1',
180 'profile': '1',
181 'user': '1',
182 'streamid': video_id
183 }), note='Requesting member info')
184
185 control_server = self._download_json(
186 'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data',
187 data=urlencode_postdata({
188 'channel_id': video_id,
189 'mode': 'play',
190 'orz': '',
191 'channel_version': member_api['data']['channel_data']['version'],
192 'client_version': '2.1.0\n [1]',
193 'client_type': 'pc',
194 'client_app': 'browser_hls',
195 'ipv6': '',
196 }), headers={'X-Requested-With': 'XMLHttpRequest'})
197 self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
198
199 ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
200 playlist_data = None
201
202 self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id)
203 ws = WebSocketsWrapper(ws_url, {
204 'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
205 'Origin': 'https://live.fc2.com',
206 'Accept': '*/*',
21633673 207 'User-Agent': self.get_param('http_headers')['User-Agent'],
15dfb392 208 })
15dfb392 209
8a82af35 210 self.write_debug('Sending HLS server request')
15dfb392
LNO
211
212 while True:
213 recv = ws.recv()
214 if not recv:
215 continue
216 data = self._parse_json(recv, video_id, fatal=False)
217 if not data or not isinstance(data, dict):
218 continue
219
220 if data.get('name') == 'connect_complete':
221 break
222 ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}')
223
224 while True:
225 recv = ws.recv()
226 if not recv:
227 continue
228 data = self._parse_json(recv, video_id, fatal=False)
229 if not data or not isinstance(data, dict):
230 continue
231 if data.get('name') == '_response_' and data.get('id') == 1:
8a82af35 232 self.write_debug('Goodbye')
15dfb392
LNO
233 playlist_data = data
234 break
8a82af35 235 self.write_debug('Server said: %s%s' % (recv[:100], '...' if len(recv) > 100 else ''))
15dfb392
LNO
236
237 if not playlist_data:
238 raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
239
240 formats = []
241 for name, playlists in playlist_data['arguments'].items():
242 if not isinstance(playlists, list):
243 continue
244 for pl in playlists:
245 if pl.get('status') == 0 and 'master_playlist' in pl.get('url'):
246 formats.extend(self._extract_m3u8_formats(
247 pl['url'], video_id, ext='mp4', m3u8_id=name, live=True,
248 headers={
249 'Origin': 'https://live.fc2.com',
250 'Referer': url,
251 }))
252
253 self._sort_formats(formats)
254 for fmt in formats:
255 fmt.update({
256 'protocol': 'fc2_live',
257 'ws': ws,
258 })
259
260 title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
261 if not title:
262 title = self._html_extract_title(webpage, 'html title', fatal=False)
263 if title:
264 # remove service name in <title>
265 title = re.sub(r'\s+-\s+.+$', '', title)
266 uploader = None
267 if title:
268 match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2))
269 if match and all(match):
270 title, uploader = match
271
272 live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None
273 if live_info_view:
274 # remove jQuery code from object literal
275 live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view)
276 live_info_view = self._parse_json(js_to_json(live_info_view), video_id)
277
278 return {
279 'id': video_id,
280 'title': title or traverse_obj(live_info_view, 'title'),
281 'description': self._html_search_meta(
282 ('og:description', 'twitter:description'),
283 webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'),
284 'formats': formats,
285 'uploader': uploader or traverse_obj(live_info_view, 'name'),
286 'uploader_id': video_id,
287 'thumbnail': traverse_obj(live_info_view, 'thumb'),
288 'is_live': True,
289 }