[yt-dlp.git] / youtube_dl / extractor / dramafever.py

# encoding: utf-8
from __future__ import unicode_literals

import itertools

from .amp import AMPIE
from ..compat import (
    compat_HTTPError,
    compat_urllib_parse,
    compat_urlparse,
)
from ..utils import (
    ExtractorError,
    clean_html,
    int_or_none,
    sanitized_Request,
)


class DramaFeverBaseIE(AMPIE):
    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
    _NETRC_MACHINE = 'dramafever'

    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'

    _consumer_secret = None

    def _get_consumer_secret(self):
        mainjs = self._download_webpage(
            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
            None, 'Downloading main.js', fatal=False)
        if not mainjs:
            return self._CONSUMER_SECRET
        return self._search_regex(
            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
            'consumer secret', default=self._CONSUMER_SECRET)

    def _real_initialize(self):
        self._login()
        self._consumer_secret = self._get_consumer_secret()

    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return

        login_form = {
            'username': username,
            'password': password,
        }

        request = sanitized_Request(
            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
        response = self._download_webpage(
            request, None, 'Logging in as %s' % username)

        if all(logout_pattern not in response
               for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
            error = self._html_search_regex(
                r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
                response, 'error message', default=None)
            if error:
                raise ExtractorError('Unable to login: %s' % error, expected=True)
            raise ExtractorError('Unable to log in')


class DramaFeverIE(DramaFeverBaseIE):
    IE_NAME = 'dramafever'
    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
    _TEST = {
        'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
        'info_dict': {
            'id': '4512.1',
            'ext': 'flv',
            'title': 'Cooking with Shin 4512.1',
            'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1404336058,
            'upload_date': '20140702',
            'duration': 343,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url).replace('/', '.')

        try:
            info = self._extract_feed_info(
                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError):
                raise ExtractorError(
                    'Currently unavailable in your country.', expected=True)
            raise

        series_id, episode_number = video_id.split('.')
        episode_info = self._download_json(
            # We only need a single episode info, so restricting page size to one episode
            # and dealing with page number as with episode number
            r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
            % (self._consumer_secret, series_id, episode_number),
            video_id, 'Downloading episode info JSON', fatal=False)
        if episode_info:
            value = episode_info.get('value')
            if isinstance(value, list):
                for v in value:
                    if v.get('type') == 'Episode':
                        subfile = v.get('subfile') or v.get('new_subfile')
                        if subfile and subfile != 'http://www.dramafever.com/st/':
                            info.setdefault('subtitles', {}).setdefault('English', []).append({
                                'ext': 'srt',
                                'url': subfile,
                            })
                        episode_number = int_or_none(v.get('number'))
                        episode_fallback = 'Episode'
                        if episode_number:
                            episode_fallback += ' %d' % episode_number
                        info['episode'] = v.get('title') or episode_fallback
                        info['episode_number'] = episode_number
                        break

        return info


class DramaFeverSeriesIE(DramaFeverBaseIE):
    IE_NAME = 'dramafever:series'
    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
    _TESTS = [{
        'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
        'info_dict': {
            'id': '4512',
            'title': 'Cooking with Shin',
            'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
        },
        'playlist_count': 4,
    }, {
        'url': 'http://www.dramafever.com/drama/124/IRIS/',
        'info_dict': {
            'id': '124',
            'title': 'IRIS',
            'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
        },
        'playlist_count': 20,
    }]

    _PAGE_SIZE = 60  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)

    def _real_extract(self, url):
        series_id = self._match_id(url)

        series = self._download_json(
            'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
            % (self._consumer_secret, series_id),
            series_id, 'Downloading series JSON')['series'][series_id]

        title = clean_html(series['name'])
        description = clean_html(series.get('description') or series.get('description_short'))

        entries = []
        for page_num in itertools.count(1):
            episodes = self._download_json(
                'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
                % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
                series_id, 'Downloading episodes JSON page #%d' % page_num)
            for episode in episodes.get('value', []):
                episode_url = episode.get('episode_url')
                if not episode_url:
                    continue
                entries.append(self.url_result(
                    compat_urlparse.urljoin(url, episode_url),
                    'DramaFever', episode.get('guid')))
            if page_num == episodes['num_pages']:
                break

        return self.playlist_result(entries, series_id, title, description)
Commit	Line	Data
f670ef1c	1	# encoding: utf-8
	2	from __future__ import unicode_literals
	3
0029071a	4	import itertools
f670ef1c	5
3793090b	6	from .amp import AMPIE
0029071a S	7	from ..compat import (
0029071a S	8	compat_HTTPError,
cbcd1a54	9	compat_urllib_parse,
0029071a S	10	compat_urlparse,
	11	)
	12	from ..utils import (
	13	ExtractorError,
	14	clean_html,
8f4c56f3	15	int_or_none,
5c2266df	16	sanitized_Request,
0029071a	17	)
f670ef1c	18
f670ef1c	19
3793090b	20	class DramaFeverBaseIE(AMPIE):
cbcd1a54 S	21	_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
	22	_NETRC_MACHINE = 'dramafever'
	23
1d1dd597 S	24	_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
	25
	26	_consumer_secret = None
	27
	28	def _get_consumer_secret(self):
	29	mainjs = self._download_webpage(
	30	'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
	31	None, 'Downloading main.js', fatal=False)
	32	if not mainjs:
	33	return self._CONSUMER_SECRET
	34	return self._search_regex(
	35	r"var\s+cs\s=\s'([^']+)'", mainjs,
	36	'consumer secret', default=self._CONSUMER_SECRET)
	37
cbcd1a54 S	38	def _real_initialize(self):
cbcd1a54 S	39	self._login()
1d1dd597	40	self._consumer_secret = self._get_consumer_secret()
cbcd1a54 S	41
	42	def _login(self):
	43	(username, password) = self._get_login_info()
	44	if username is None:
	45	return
	46
	47	login_form = {
	48	'username': username,
	49	'password': password,
	50	}
	51
5c2266df	52	request = sanitized_Request(
cbcd1a54 S	53	self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
	54	response = self._download_webpage(
	55	request, None, 'Logging in as %s' % username)
	56
	57	if all(logout_pattern not in response
	58	for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
	59	error = self._html_search_regex(
	60	r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
	61	response, 'error message', default=None)
	62	if error:
	63	raise ExtractorError('Unable to login: %s' % error, expected=True)
	64	raise ExtractorError('Unable to log in')
	65
	66
	67	class DramaFeverIE(DramaFeverBaseIE):
f670ef1c	68	IE_NAME = 'dramafever'
450d89dd	69	_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/\|$)'
0029071a	70	_TEST = {
f670ef1c	71	'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
	72	'info_dict': {
	73	'id': '4512.1',
	74	'ext': 'flv',
	75	'title': 'Cooking with Shin 4512.1',
0029071a S	76	'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
	77	'thumbnail': 're:^https?://.*\.jpg',
	78	'timestamp': 1404336058,
f670ef1c	79	'upload_date': '20140702',
0029071a	80	'duration': 343,
3793090b	81	},
	82	'params': {
	83	# m3u8 download
	84	'skip_download': True,
	85	},
0029071a	86	}
f670ef1c	87
f670ef1c	88	def _real_extract(self, url):
0029071a	89	video_id = self._match_id(url).replace('/', '.')
f670ef1c	90
0029071a	91	try:
c7fa5fa4	92	info = self._extract_feed_info(
c7fa5fa4	93	'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
0029071a S	94	except ExtractorError as e:
	95	if isinstance(e.cause, compat_HTTPError):
	96	raise ExtractorError(
	97	'Currently unavailable in your country.', expected=True)
	98	raise
f670ef1c	99
1d1dd597 S	100	series_id, episode_number = video_id.split('.')
	101	episode_info = self._download_json(
	102	# We only need a single episode info, so restricting page size to one episode
	103	# and dealing with page number as with episode number
	104	r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
	105	% (self._consumer_secret, series_id, episode_number),
	106	video_id, 'Downloading episode info JSON', fatal=False)
	107	if episode_info:
	108	value = episode_info.get('value')
1dcc38b2 S	109	if isinstance(value, list):
	110	for v in value:
	111	if v.get('type') == 'Episode':
	112	subfile = v.get('subfile') or v.get('new_subfile')
	113	if subfile and subfile != 'http://www.dramafever.com/st/':
	114	info.setdefault('subtitles', {}).setdefault('English', []).append({
	115	'ext': 'srt',
	116	'url': subfile,
	117	})
bd19aa0e S	118	episode_number = int_or_none(v.get('number'))
	119	episode_fallback = 'Episode'
	120	if episode_number:
	121	episode_fallback += ' %d' % episode_number
a2e51e7b	122	info['episode'] = v.get('title') or episode_fallback
bd19aa0e	123	info['episode_number'] = episode_number
1dcc38b2	124	break
1d1dd597	125
3793090b	126	return info
f670ef1c	127
0029071a	128
cbcd1a54	129	class DramaFeverSeriesIE(DramaFeverBaseIE):
f670ef1c	130	IE_NAME = 'dramafever:series'
70a20023	131	_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/\|$)).+)?)?$'
f670ef1c	132	_TESTS = [{
	133	'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
	134	'info_dict': {
	135	'id': '4512',
	136	'title': 'Cooking with Shin',
0029071a	137	'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
f670ef1c	138	},
	139	'playlist_count': 4,
	140	}, {
	141	'url': 'http://www.dramafever.com/drama/124/IRIS/',
	142	'info_dict': {
	143	'id': '124',
	144	'title': 'IRIS',
0029071a	145	'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
f670ef1c	146	},
	147	'playlist_count': 20,
	148	}]
	149
463b2e55	150	_PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
0029071a	151
f670ef1c	152	def _real_extract(self, url):
f670ef1c	153	series_id = self._match_id(url)
0029071a	154
0029071a S	155	series = self._download_json(
0029071a S	156	'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
1d1dd597	157	% (self._consumer_secret, series_id),
0029071a	158	series_id, 'Downloading series JSON')['series'][series_id]
f670ef1c	159
0029071a S	160	title = clean_html(series['name'])
0029071a S	161	description = clean_html(series.get('description') or series.get('description_short'))
f670ef1c	162
f670ef1c	163	entries = []
0029071a S	164	for page_num in itertools.count(1):
	165	episodes = self._download_json(
	166	'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
1d1dd597	167	% (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
0029071a S	168	series_id, 'Downloading episodes JSON page #%d' % page_num)
0029071a S	169	for episode in episodes.get('value', []):
10464af5 S	170	episode_url = episode.get('episode_url')
	171	if not episode_url:
	172	continue
0029071a	173	entries.append(self.url_result(
10464af5	174	compat_urlparse.urljoin(url, episode_url),
0029071a S	175	'DramaFever', episode.get('guid')))
	176	if page_num == episodes['num_pages']:
	177	break
	178
f670ef1c	179	return self.playlist_result(entries, series_id, title, description)