yt_dlp/extractor/fc2.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..compat import (
   5     compat_parse_qs,
   6 )
   7 from ..dependencies import websockets
   8 from ..utils import (
   9     ExtractorError,
  10     WebSocketsWrapper,
  11     js_to_json,
  12     sanitized_Request,
  13     traverse_obj,
  14     update_url_query,
  15     urlencode_postdata,
  16     urljoin,
  17 )
  18
  19
  20 class FC2IE(InfoExtractor):
  21     _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
  22     IE_NAME = 'fc2'
  23     _NETRC_MACHINE = 'fc2'
  24     _TESTS = [{
  25         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
  26         'md5': 'a6ebe8ebe0396518689d963774a54eb7',
  27         'info_dict': {
  28             'id': '20121103kUan1KHs',
  29             'ext': 'flv',
  30             'title': 'Boxing again with Puff',
  31         },
  32     }, {
  33         'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
  34         'info_dict': {
  35             'id': '20150125cEva0hDn',
  36             'ext': 'mp4',
  37         },
  38         'params': {
  39             'username': 'ytdl@yt-dl.org',
  40             'password': '(snip)',
  41         },
  42         'skip': 'requires actual password',
  43     }, {
  44         'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
  45         'only_matching': True,
  46     }]
  47
  48     def _login(self):
  49         username, password = self._get_login_info()
  50         if username is None or password is None:
  51             return False
  52
  53         # Log in
  54         login_form_strs = {
  55             'email': username,
  56             'password': password,
  57             'done': 'video',
  58             'Submit': ' Login ',
  59         }
  60
  61         login_data = urlencode_postdata(login_form_strs)
  62         request = sanitized_Request(
  63             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
  64
  65         login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
  66         if 'mode=redirect&login=done' not in login_results:
  67             self.report_warning('unable to log in: bad username or password')
  68             return False
  69
  70         # this is also needed
  71         login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
  72         self._download_webpage(
  73             login_redir, None, note='Login redirect', errnote='Login redirect failed')
  74
  75         return True
  76
  77     def _real_extract(self, url):
  78         video_id = self._match_id(url)
  79         self._login()
  80         webpage = None
  81         if not url.startswith('fc2:'):
  82             webpage = self._download_webpage(url, video_id)
  83             self._downloader.cookiejar.clear_session_cookies()  # must clear
  84             self._login()
  85
  86         title, thumbnail, description = None, None, None
  87         if webpage is not None:
  88             title = self._html_search_regex(
  89                 (r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
  90                  r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
  91                  # there's two matches in the webpage
  92                  r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
  93                 webpage,
  94                 'title', fatal=False)
  95             thumbnail = self._og_search_thumbnail(webpage)
  96             description = self._og_search_description(webpage, default=None)
  97
  98         vidplaylist = self._download_json(
  99             'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
 100             note='Downloading info page')
 101         vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
 102         if not vid_url:
 103             raise ExtractorError('Unable to extract video URL')
 104         vid_url = urljoin('https://video.fc2.com/', vid_url)
 105
 106         return {
 107             'id': video_id,
 108             'title': title,
 109             'url': vid_url,
 110             'ext': 'mp4',
 111             'protocol': 'm3u8_native',
 112             'description': description,
 113             'thumbnail': thumbnail,
 114         }
 115
 116
 117 class FC2EmbedIE(InfoExtractor):
 118     _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
 119     IE_NAME = 'fc2:embed'
 120
 121     _TEST = {
 122         'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン･ブレイク%20S1-01%20マイケル%20【吹替】',
 123         'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
 124         'info_dict': {
 125             'id': '201403223kCqB3Ez',
 126             'ext': 'flv',
 127             'title': 'プリズン･ブレイク S1-01 マイケル 【吹替】',
 128             'thumbnail': r're:^https?://.*\.jpg$',
 129         },
 130     }
 131
 132     def _real_extract(self, url):
 133         mobj = self._match_valid_url(url)
 134         query = compat_parse_qs(mobj.group('query'))
 135
 136         video_id = query['i'][-1]
 137         title = query.get('tl', ['FC2 video %s' % video_id])[0]
 138
 139         sj = query.get('sj', [None])[0]
 140         thumbnail = None
 141         if sj:
 142             # See thumbnailImagePath() in ServerConst.as of flv2.swf
 143             thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
 144                 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
 145
 146         return {
 147             '_type': 'url_transparent',
 148             'ie_key': FC2IE.ie_key(),
 149             'url': 'fc2:%s' % video_id,
 150             'title': title,
 151             'thumbnail': thumbnail,
 152         }
 153
 154
 155 class FC2LiveIE(InfoExtractor):
 156     _VALID_URL = r'https?://live\.fc2\.com/(?P<id>\d+)'
 157     IE_NAME = 'fc2:live'
 158
 159     _TESTS = [{
 160         'url': 'https://live.fc2.com/57892267/',
 161         'info_dict': {
 162             'id': '57892267',
 163             'title': 'どこまで・・・',
 164             'uploader': 'あつあげ',
 165             'uploader_id': '57892267',
 166             'thumbnail': r're:https?://.+fc2.+',
 167         },
 168         'skip': 'livestream',
 169     }]
 170
 171     def _real_extract(self, url):
 172         if not websockets:
 173             raise ExtractorError('websockets library is not available. Please install it.', expected=True)
 174         video_id = self._match_id(url)
 175         webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
 176
 177         self._set_cookie('live.fc2.com', 'js-player_size', '1')
 178
 179         member_api = self._download_json(
 180             'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({
 181                 'channel': '1',
 182                 'profile': '1',
 183                 'user': '1',
 184                 'streamid': video_id
 185             }), note='Requesting member info')
 186
 187         control_server = self._download_json(
 188             'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data',
 189             data=urlencode_postdata({
 190                 'channel_id': video_id,
 191                 'mode': 'play',
 192                 'orz': '',
 193                 'channel_version': member_api['data']['channel_data']['version'],
 194                 'client_version': '2.1.0\n [1]',
 195                 'client_type': 'pc',
 196                 'client_app': 'browser_hls',
 197                 'ipv6': '',
 198             }), headers={'X-Requested-With': 'XMLHttpRequest'})
 199         self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
 200
 201         ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
 202         playlist_data = None
 203
 204         self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id)
 205         ws = WebSocketsWrapper(ws_url, {
 206             'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
 207             'Origin': 'https://live.fc2.com',
 208             'Accept': '*/*',
 209             'User-Agent': self.get_param('http_headers')['User-Agent'],
 210         })
 211
 212         self.write_debug('[debug] Sending HLS server request')
 213
 214         while True:
 215             recv = ws.recv()
 216             if not recv:
 217                 continue
 218             data = self._parse_json(recv, video_id, fatal=False)
 219             if not data or not isinstance(data, dict):
 220                 continue
 221
 222             if data.get('name') == 'connect_complete':
 223                 break
 224         ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}')
 225
 226         while True:
 227             recv = ws.recv()
 228             if not recv:
 229                 continue
 230             data = self._parse_json(recv, video_id, fatal=False)
 231             if not data or not isinstance(data, dict):
 232                 continue
 233             if data.get('name') == '_response_' and data.get('id') == 1:
 234                 self.write_debug('[debug] Goodbye.')
 235                 playlist_data = data
 236                 break
 237             elif self._downloader.params.get('verbose', False):
 238                 if len(recv) > 100:
 239                     recv = recv[:100] + '...'
 240                 self.to_screen('[debug] Server said: %s' % recv)
 241
 242         if not playlist_data:
 243             raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
 244
 245         formats = []
 246         for name, playlists in playlist_data['arguments'].items():
 247             if not isinstance(playlists, list):
 248                 continue
 249             for pl in playlists:
 250                 if pl.get('status') == 0 and 'master_playlist' in pl.get('url'):
 251                     formats.extend(self._extract_m3u8_formats(
 252                         pl['url'], video_id, ext='mp4', m3u8_id=name, live=True,
 253                         headers={
 254                             'Origin': 'https://live.fc2.com',
 255                             'Referer': url,
 256                         }))
 257
 258         self._sort_formats(formats)
 259         for fmt in formats:
 260             fmt.update({
 261                 'protocol': 'fc2_live',
 262                 'ws': ws,
 263             })
 264
 265         title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
 266         if not title:
 267             title = self._html_extract_title(webpage, 'html title', fatal=False)
 268             if title:
 269                 # remove service name in <title>
 270                 title = re.sub(r'\s+-\s+.+$', '', title)
 271         uploader = None
 272         if title:
 273             match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2))
 274             if match and all(match):
 275                 title, uploader = match
 276
 277         live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None
 278         if live_info_view:
 279             # remove jQuery code from object literal
 280             live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view)
 281             live_info_view = self._parse_json(js_to_json(live_info_view), video_id)
 282
 283         return {
 284             'id': video_id,
 285             'title': title or traverse_obj(live_info_view, 'title'),
 286             'description': self._html_search_meta(
 287                 ('og:description', 'twitter:description'),
 288                 webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'),
 289             'formats': formats,
 290             'uploader': uploader or traverse_obj(live_info_view, 'name'),
 291             'uploader_id': video_id,
 292             'thumbnail': traverse_obj(live_info_view, 'thumb'),
 293             'is_live': True,
 294         }