]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/fc2.py
[utils] `is_html`: Handle double BOM
[yt-dlp.git] / yt_dlp / extractor / fc2.py
CommitLineData
15dfb392
LNO
1import re
2
8e71456a 3from .common import InfoExtractor
1cc79574 4from ..compat import (
cf0efe96 5 compat_parse_qs,
8e71456a 6)
9b8ee23b 7from ..dependencies import websockets
1cc79574
PH
8from ..utils import (
9 ExtractorError,
15dfb392 10 WebSocketsWrapper,
15dfb392 11 js_to_json,
5c2266df 12 sanitized_Request,
15dfb392 13 std_headers,
d6bc443b 14 traverse_obj,
15dfb392 15 update_url_query,
6e6bc8da 16 urlencode_postdata,
d6bc443b 17 urljoin,
1cc79574 18)
8e71456a
PH
19
20
21class FC2IE(InfoExtractor):
cf0efe96 22 _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
8e71456a 23 IE_NAME = 'fc2'
4231235c 24 _NETRC_MACHINE = 'fc2'
8940b860 25 _TESTS = [{
8e71456a
PH
26 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
27 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
28 'info_dict': {
29 'id': '20121103kUan1KHs',
30 'ext': 'flv',
31 'title': 'Boxing again with Puff',
32 },
8940b860
PH
33 }, {
34 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
35 'info_dict': {
36 'id': '20150125cEva0hDn',
37 'ext': 'mp4',
38 },
39 'params': {
40 'username': 'ytdl@yt-dl.org',
41 'password': '(snip)',
38d05d17
YCH
42 },
43 'skip': 'requires actual password',
52dfb7ff
S
44 }, {
45 'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
46 'only_matching': True,
8940b860 47 }]
4231235c 48
49 def _login(self):
68217024 50 username, password = self._get_login_info()
8940b860
PH
51 if username is None or password is None:
52 return False
4231235c 53
54 # Log in
55 login_form_strs = {
8940b860 56 'email': username,
4231235c 57 'password': password,
8940b860
PH
58 'done': 'video',
59 'Submit': ' Login ',
4231235c 60 }
61
6e6bc8da 62 login_data = urlencode_postdata(login_form_strs)
5c2266df 63 request = sanitized_Request(
4231235c 64 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
65
66 login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
67 if 'mode=redirect&login=done' not in login_results:
8940b860 68 self.report_warning('unable to log in: bad username or password')
4231235c 69 return False
8940b860 70
4231235c 71 # this is also needed
5c2266df 72 login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
8940b860
PH
73 self._download_webpage(
74 login_redir, None, note='Login redirect', errnote='Login redirect failed')
4231235c 75
76 return True
8e71456a
PH
77
78 def _real_extract(self, url):
1cc79574 79 video_id = self._match_id(url)
8940b860 80 self._login()
cf0efe96
YCH
81 webpage = None
82 if not url.startswith('fc2:'):
83 webpage = self._download_webpage(url, video_id)
84 self._downloader.cookiejar.clear_session_cookies() # must clear
85 self._login()
86
d6bc443b 87 title, thumbnail, description = None, None, None
cf0efe96 88 if webpage is not None:
d6bc443b
LNO
89 title = self._html_search_regex(
90 (r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
91 r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
92 # there's two matches in the webpage
93 r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
94 webpage,
95 'title', fatal=False)
cf0efe96 96 thumbnail = self._og_search_thumbnail(webpage)
81c5f44c 97 description = self._og_search_description(webpage, default=None)
8e71456a 98
d6bc443b
LNO
99 vidplaylist = self._download_json(
100 'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
101 note='Downloading info page')
102 vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
103 if not vid_url:
104 raise ExtractorError('Unable to extract video URL')
105 vid_url = urljoin('https://video.fc2.com/', vid_url)
8e71456a
PH
106
107 return {
108 'id': video_id,
23ae281b 109 'title': title,
d6bc443b
LNO
110 'url': vid_url,
111 'ext': 'mp4',
81c5f44c 112 'protocol': 'm3u8_native',
d6bc443b 113 'description': description,
8e71456a
PH
114 'thumbnail': thumbnail,
115 }
cf0efe96
YCH
116
117
118class FC2EmbedIE(InfoExtractor):
119 _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
120 IE_NAME = 'fc2:embed'
121
122 _TEST = {
123 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
124 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
125 'info_dict': {
126 'id': '201403223kCqB3Ez',
127 'ext': 'flv',
128 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
ec85ded8 129 'thumbnail': r're:^https?://.*\.jpg$',
cf0efe96
YCH
130 },
131 }
132
133 def _real_extract(self, url):
5ad28e7f 134 mobj = self._match_valid_url(url)
cf0efe96
YCH
135 query = compat_parse_qs(mobj.group('query'))
136
137 video_id = query['i'][-1]
138 title = query.get('tl', ['FC2 video %s' % video_id])[0]
139
140 sj = query.get('sj', [None])[0]
141 thumbnail = None
142 if sj:
143 # See thumbnailImagePath() in ServerConst.as of flv2.swf
144 thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
145 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
146
147 return {
148 '_type': 'url_transparent',
ed2bfe93 149 'ie_key': FC2IE.ie_key(),
cf0efe96
YCH
150 'url': 'fc2:%s' % video_id,
151 'title': title,
152 'thumbnail': thumbnail,
153 }
15dfb392
LNO
154
155
156class FC2LiveIE(InfoExtractor):
157 _VALID_URL = r'https?://live\.fc2\.com/(?P<id>\d+)'
158 IE_NAME = 'fc2:live'
159
160 _TESTS = [{
161 'url': 'https://live.fc2.com/57892267/',
162 'info_dict': {
163 'id': '57892267',
164 'title': 'どこまで・・・',
165 'uploader': 'あつあげ',
166 'uploader_id': '57892267',
167 'thumbnail': r're:https?://.+fc2.+',
168 },
169 'skip': 'livestream',
170 }]
171
172 def _real_extract(self, url):
9b8ee23b 173 if not websockets:
15dfb392
LNO
174 raise ExtractorError('websockets library is not available. Please install it.', expected=True)
175 video_id = self._match_id(url)
176 webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
177
178 self._set_cookie('live.fc2.com', 'js-player_size', '1')
179
180 member_api = self._download_json(
181 'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({
182 'channel': '1',
183 'profile': '1',
184 'user': '1',
185 'streamid': video_id
186 }), note='Requesting member info')
187
188 control_server = self._download_json(
189 'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data',
190 data=urlencode_postdata({
191 'channel_id': video_id,
192 'mode': 'play',
193 'orz': '',
194 'channel_version': member_api['data']['channel_data']['version'],
195 'client_version': '2.1.0\n [1]',
196 'client_type': 'pc',
197 'client_app': 'browser_hls',
198 'ipv6': '',
199 }), headers={'X-Requested-With': 'XMLHttpRequest'})
200 self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
201
202 ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
203 playlist_data = None
204
205 self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id)
206 ws = WebSocketsWrapper(ws_url, {
207 'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
208 'Origin': 'https://live.fc2.com',
209 'Accept': '*/*',
210 'User-Agent': std_headers['User-Agent'],
211 })
15dfb392
LNO
212
213 self.write_debug('[debug] Sending HLS server request')
214
215 while True:
216 recv = ws.recv()
217 if not recv:
218 continue
219 data = self._parse_json(recv, video_id, fatal=False)
220 if not data or not isinstance(data, dict):
221 continue
222
223 if data.get('name') == 'connect_complete':
224 break
225 ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}')
226
227 while True:
228 recv = ws.recv()
229 if not recv:
230 continue
231 data = self._parse_json(recv, video_id, fatal=False)
232 if not data or not isinstance(data, dict):
233 continue
234 if data.get('name') == '_response_' and data.get('id') == 1:
235 self.write_debug('[debug] Goodbye.')
236 playlist_data = data
237 break
238 elif self._downloader.params.get('verbose', False):
239 if len(recv) > 100:
240 recv = recv[:100] + '...'
241 self.to_screen('[debug] Server said: %s' % recv)
242
243 if not playlist_data:
244 raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
245
246 formats = []
247 for name, playlists in playlist_data['arguments'].items():
248 if not isinstance(playlists, list):
249 continue
250 for pl in playlists:
251 if pl.get('status') == 0 and 'master_playlist' in pl.get('url'):
252 formats.extend(self._extract_m3u8_formats(
253 pl['url'], video_id, ext='mp4', m3u8_id=name, live=True,
254 headers={
255 'Origin': 'https://live.fc2.com',
256 'Referer': url,
257 }))
258
259 self._sort_formats(formats)
260 for fmt in formats:
261 fmt.update({
262 'protocol': 'fc2_live',
263 'ws': ws,
264 })
265
266 title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
267 if not title:
268 title = self._html_extract_title(webpage, 'html title', fatal=False)
269 if title:
270 # remove service name in <title>
271 title = re.sub(r'\s+-\s+.+$', '', title)
272 uploader = None
273 if title:
274 match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2))
275 if match and all(match):
276 title, uploader = match
277
278 live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None
279 if live_info_view:
280 # remove jQuery code from object literal
281 live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view)
282 live_info_view = self._parse_json(js_to_json(live_info_view), video_id)
283
284 return {
285 'id': video_id,
286 'title': title or traverse_obj(live_info_view, 'title'),
287 'description': self._html_search_meta(
288 ('og:description', 'twitter:description'),
289 webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'),
290 'formats': formats,
291 'uploader': uploader or traverse_obj(live_info_view, 'name'),
292 'uploader_id': video_id,
293 'thumbnail': traverse_obj(live_info_view, 'thumb'),
294 'is_live': True,
295 }