youtube_dl/extractor/dramafever.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5
   6 from .amp import AMPIE
   7 from ..compat import (
   8     compat_HTTPError,
   9     compat_urllib_parse,
  10     compat_urlparse,
  11 )
  12 from ..utils import (
  13     ExtractorError,
  14     clean_html,
  15     sanitized_Request,
  16 )
  17
  18
  19 class DramaFeverBaseIE(AMPIE):
  20     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
  21     _NETRC_MACHINE = 'dramafever'
  22
  23     _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
  24
  25     _consumer_secret = None
  26
  27     def _get_consumer_secret(self):
  28         mainjs = self._download_webpage(
  29             'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
  30             None, 'Downloading main.js', fatal=False)
  31         if not mainjs:
  32             return self._CONSUMER_SECRET
  33         return self._search_regex(
  34             r"var\s+cs\s*=\s*'([^']+)'", mainjs,
  35             'consumer secret', default=self._CONSUMER_SECRET)
  36
  37     def _real_initialize(self):
  38         self._login()
  39         self._consumer_secret = self._get_consumer_secret()
  40
  41     def _login(self):
  42         (username, password) = self._get_login_info()
  43         if username is None:
  44             return
  45
  46         login_form = {
  47             'username': username,
  48             'password': password,
  49         }
  50
  51         request = sanitized_Request(
  52             self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
  53         response = self._download_webpage(
  54             request, None, 'Logging in as %s' % username)
  55
  56         if all(logout_pattern not in response
  57                for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
  58             error = self._html_search_regex(
  59                 r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
  60                 response, 'error message', default=None)
  61             if error:
  62                 raise ExtractorError('Unable to login: %s' % error, expected=True)
  63             raise ExtractorError('Unable to log in')
  64
  65
  66 class DramaFeverIE(DramaFeverBaseIE):
  67     IE_NAME = 'dramafever'
  68     _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
  69     _TEST = {
  70         'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
  71         'info_dict': {
  72             'id': '4512.1',
  73             'ext': 'flv',
  74             'title': 'Cooking with Shin 4512.1',
  75             'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
  76             'thumbnail': 're:^https?://.*\.jpg',
  77             'timestamp': 1404336058,
  78             'upload_date': '20140702',
  79             'duration': 343,
  80         },
  81         'params': {
  82             # m3u8 download
  83             'skip_download': True,
  84         },
  85     }
  86
  87     def _real_extract(self, url):
  88         video_id = self._match_id(url).replace('/', '.')
  89
  90         try:
  91             info = self._extract_feed_info(
  92                 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
  93         except ExtractorError as e:
  94             if isinstance(e.cause, compat_HTTPError):
  95                 raise ExtractorError(
  96                     'Currently unavailable in your country.', expected=True)
  97             raise
  98
  99         series_id, episode_number = video_id.split('.')
 100         episode_info = self._download_json(
 101             # We only need a single episode info, so restricting page size to one episode
 102             # and dealing with page number as with episode number
 103             r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
 104             % (self._consumer_secret, series_id, episode_number),
 105             video_id, 'Downloading episode info JSON', fatal=False)
 106         if episode_info:
 107             value = episode_info.get('value')
 108             if value:
 109                 subfile = value[0].get('subfile') or value[0].get('new_subfile')
 110                 if subfile and subfile != 'http://www.dramafever.com/st/':
 111                     info.setdefault('subtitles', {}).setdefault('English', []).append({
 112                         'ext': 'srt',
 113                         'url': subfile,
 114                     })
 115
 116         return info
 117
 118
 119 class DramaFeverSeriesIE(DramaFeverBaseIE):
 120     IE_NAME = 'dramafever:series'
 121     _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
 122     _TESTS = [{
 123         'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
 124         'info_dict': {
 125             'id': '4512',
 126             'title': 'Cooking with Shin',
 127             'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
 128         },
 129         'playlist_count': 4,
 130     }, {
 131         'url': 'http://www.dramafever.com/drama/124/IRIS/',
 132         'info_dict': {
 133             'id': '124',
 134             'title': 'IRIS',
 135             'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
 136         },
 137         'playlist_count': 20,
 138     }]
 139
 140     _PAGE_SIZE = 60  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
 141
 142     def _real_extract(self, url):
 143         series_id = self._match_id(url)
 144
 145         series = self._download_json(
 146             'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
 147             % (self._consumer_secret, series_id),
 148             series_id, 'Downloading series JSON')['series'][series_id]
 149
 150         title = clean_html(series['name'])
 151         description = clean_html(series.get('description') or series.get('description_short'))
 152
 153         entries = []
 154         for page_num in itertools.count(1):
 155             episodes = self._download_json(
 156                 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
 157                 % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
 158                 series_id, 'Downloading episodes JSON page #%d' % page_num)
 159             for episode in episodes.get('value', []):
 160                 episode_url = episode.get('episode_url')
 161                 if not episode_url:
 162                     continue
 163                 entries.append(self.url_result(
 164                     compat_urlparse.urljoin(url, episode_url),
 165                     'DramaFever', episode.get('guid')))
 166             if page_num == episodes['num_pages']:
 167                 break
 168
 169         return self.playlist_result(entries, series_id, title, description)