yt_dlp/extractor/fc2.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import (
   6     compat_parse_qs,
   7 )
   8 from ..utils import (
   9     ExtractorError,
  10     sanitized_Request,
  11     traverse_obj,
  12     urlencode_postdata,
  13     urljoin,
  14 )
  15
  16
  17 class FC2IE(InfoExtractor):
  18     _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
  19     IE_NAME = 'fc2'
  20     _NETRC_MACHINE = 'fc2'
  21     _TESTS = [{
  22         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
  23         'md5': 'a6ebe8ebe0396518689d963774a54eb7',
  24         'info_dict': {
  25             'id': '20121103kUan1KHs',
  26             'ext': 'flv',
  27             'title': 'Boxing again with Puff',
  28         },
  29     }, {
  30         'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
  31         'info_dict': {
  32             'id': '20150125cEva0hDn',
  33             'ext': 'mp4',
  34         },
  35         'params': {
  36             'username': 'ytdl@yt-dl.org',
  37             'password': '(snip)',
  38         },
  39         'skip': 'requires actual password',
  40     }, {
  41         'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
  42         'only_matching': True,
  43     }]
  44
  45     def _login(self):
  46         username, password = self._get_login_info()
  47         if username is None or password is None:
  48             return False
  49
  50         # Log in
  51         login_form_strs = {
  52             'email': username,
  53             'password': password,
  54             'done': 'video',
  55             'Submit': ' Login ',
  56         }
  57
  58         login_data = urlencode_postdata(login_form_strs)
  59         request = sanitized_Request(
  60             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
  61
  62         login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
  63         if 'mode=redirect&login=done' not in login_results:
  64             self.report_warning('unable to log in: bad username or password')
  65             return False
  66
  67         # this is also needed
  68         login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
  69         self._download_webpage(
  70             login_redir, None, note='Login redirect', errnote='Login redirect failed')
  71
  72         return True
  73
  74     def _real_extract(self, url):
  75         video_id = self._match_id(url)
  76         self._login()
  77         webpage = None
  78         if not url.startswith('fc2:'):
  79             webpage = self._download_webpage(url, video_id)
  80             self._downloader.cookiejar.clear_session_cookies()  # must clear
  81             self._login()
  82
  83         title, thumbnail, description = None, None, None
  84         if webpage is not None:
  85             title = self._html_search_regex(
  86                 (r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
  87                  r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
  88                  # there's two matches in the webpage
  89                  r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
  90                 webpage,
  91                 'title', fatal=False)
  92             thumbnail = self._og_search_thumbnail(webpage)
  93             description = self._og_search_description(webpage, default=None)
  94
  95         vidplaylist = self._download_json(
  96             'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
  97             note='Downloading info page')
  98         vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
  99         if not vid_url:
 100             raise ExtractorError('Unable to extract video URL')
 101         vid_url = urljoin('https://video.fc2.com/', vid_url)
 102
 103         return {
 104             'id': video_id,
 105             'title': title,
 106             'url': vid_url,
 107             'ext': 'mp4',
 108             'protocol': 'm3u8_native',
 109             'description': description,
 110             'thumbnail': thumbnail,
 111         }
 112
 113
 114 class FC2EmbedIE(InfoExtractor):
 115     _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
 116     IE_NAME = 'fc2:embed'
 117
 118     _TEST = {
 119         'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン･ブレイク%20S1-01%20マイケル%20【吹替】',
 120         'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
 121         'info_dict': {
 122             'id': '201403223kCqB3Ez',
 123             'ext': 'flv',
 124             'title': 'プリズン･ブレイク S1-01 マイケル 【吹替】',
 125             'thumbnail': r're:^https?://.*\.jpg$',
 126         },
 127     }
 128
 129     def _real_extract(self, url):
 130         mobj = self._match_valid_url(url)
 131         query = compat_parse_qs(mobj.group('query'))
 132
 133         video_id = query['i'][-1]
 134         title = query.get('tl', ['FC2 video %s' % video_id])[0]
 135
 136         sj = query.get('sj', [None])[0]
 137         thumbnail = None
 138         if sj:
 139             # See thumbnailImagePath() in ServerConst.as of flv2.swf
 140             thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
 141                 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
 142
 143         return {
 144             '_type': 'url_transparent',
 145             'ie_key': FC2IE.ie_key(),
 146             'url': 'fc2:%s' % video_id,
 147             'title': title,
 148             'thumbnail': thumbnail,
 149         }