[yt-dlp.git] / youtube_dl / extractor / escapist.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..compat import (
    compat_urllib_parse,
)
from ..utils import (
    ExtractorError,
    js_to_json,
)


class EscapistIE(InfoExtractor):
    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
    _TEST = {
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
        'info_dict': {
            'id': '6618',
            'ext': 'mp4',
            'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
            'uploader_id': 'the-escapist-presents',
            'uploader': 'The Escapist Presents',
            'title': "Breaking Down Baldur's Gate",
            'thumbnail': 're:^https?://.*\.jpg$',
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        uploader_id = self._html_search_regex(
            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
            webpage, 'uploader ID', fatal=False)
        uploader = self._html_search_regex(
            r"<h1\s+class='headline'>(.*?)</a>",
            webpage, 'uploader', fatal=False)
        description = self._html_search_meta('description', webpage)

        raw_title = self._html_search_meta('title', webpage, fatal=True)
        title = raw_title.partition(' : ')[2]

        config_url = compat_urllib_parse.unquote(self._html_search_regex(
            r'''(?x)
            (?:
                <param\s+name="flashvars".*?\s+value="config=|
                flashvars=&quot;config=
            )
            (https?://[^"&]+)
            ''',
            webpage, 'config URL'))

        formats = []

        def _add_format(name, cfgurl, quality):
            config = self._download_json(
                cfgurl, video_id,
                'Downloading ' + name + ' configuration',
                'Unable to download ' + name + ' configuration',
                transform_source=js_to_json)

            playlist = config['playlist']
            video_url = next(
                p['url'] for p in playlist
                if p.get('eventCategory') == 'Video')
            formats.append({
                'url': video_url,
                'format_id': name,
                'quality': quality,
            })

        _add_format('normal', config_url, quality=0)
        hq_url = (config_url +
                  ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
        try:
            _add_format('hq', hq_url, quality=1)
        except ExtractorError:
            pass  # That's fine, we'll just use normal quality

        self._sort_formats(formats)

        return {
            'id': video_id,
            'formats': formats,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': description,
        }
Commit	Line	Data
6f90d098 PH	1	from __future__ import unicode_literals
6f90d098 PH	2
15369766	3	from .common import InfoExtractor
1cc79574	4	from ..compat import (
15369766	5	compat_urllib_parse,
1cc79574 PH	6	)
1cc79574 PH	7	from ..utils import (
15369766	8	ExtractorError,
596ac6e3	9	js_to_json,
15369766 PH	10	)
	11
	12
	13	class EscapistIE(InfoExtractor):
596ac6e3	14	_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$\|[?#])'
6f5ac90c	15	_TEST = {
6f90d098 PH	16	'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
	17	'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
	18	'info_dict': {
	19	'id': '6618',
	20	'ext': 'mp4',
	21	'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
596ac6e3 PH	22	'uploader_id': 'the-escapist-presents',
596ac6e3 PH	23	'uploader': 'The Escapist Presents',
6f90d098	24	'title': "Breaking Down Baldur's Gate",
3da0db62	25	'thumbnail': 're:^https?://.*\.jpg$',
6f5ac90c PH	26	}
6f5ac90c PH	27	}
15369766 PH	28
15369766 PH	29	def _real_extract(self, url):
596ac6e3	30	video_id = self._match_id(url)
6f90d098	31	webpage = self._download_webpage(url, video_id)
15369766	32
596ac6e3	33	uploader_id = self._html_search_regex(
c010af6f	34	r"<h1\s+class='headline'>\s<a\s+href='/videos/view/(.?)'",
596ac6e3 PH	35	webpage, 'uploader ID', fatal=False)
596ac6e3 PH	36	uploader = self._html_search_regex(
c010af6f	37	r"<h1\s+class='headline'>(.*?)</a>",
596ac6e3 PH	38	webpage, 'uploader', fatal=False)
596ac6e3 PH	39	description = self._html_search_meta('description', webpage)
15369766	40
596ac6e3 PH	41	raw_title = self._html_search_meta('title', webpage, fatal=True)
596ac6e3 PH	42	title = raw_title.partition(' : ')[2]
15369766	43
3da0db62	44	config_url = compat_urllib_parse.unquote(self._html_search_regex(
54233c90 PH	45	r'''(?x)
54233c90 PH	46	(?:
1c69bca2	47	<param\s+name="flashvars".*?\s+value="config=\|
54233c90 PH	48	flashvars="config=
54233c90 PH	49	)
1c69bca2	50	(https?://[^"&]+)
54233c90 PH	51	''',
54233c90 PH	52	webpage, 'config URL'))
15369766	53
100959a6	54	formats = []
15369766	55
6f90d098 PH	56	def _add_format(name, cfgurl, quality):
	57	config = self._download_json(
	58	cfgurl, video_id,
	59	'Downloading ' + name + ' configuration',
	60	'Unable to download ' + name + ' configuration',
596ac6e3	61	transform_source=js_to_json)
15369766	62
100959a6	63	playlist = config['playlist']
9650885b PH	64	video_url = next(
	65	p['url'] for p in playlist
	66	if p.get('eventCategory') == 'Video')
100959a6	67	formats.append({
9650885b	68	'url': video_url,
100959a6	69	'format_id': name,
6f90d098	70	'quality': quality,
100959a6	71	})
15369766	72
596ac6e3 PH	73	_add_format('normal', config_url, quality=0)
	74	hq_url = (config_url +
	75	('&hq=1' if '?' in config_url else config_url + '?hq=1'))
100959a6	76	try:
6f90d098	77	_add_format('hq', hq_url, quality=1)
15ff3c83 PH	78	except ExtractorError:
15ff3c83 PH	79	pass # That's fine, we'll just use normal quality
100959a6	80
6f90d098 PH	81	self._sort_formats(formats)
6f90d098 PH	82
100959a6	83	return {
6f90d098	84	'id': video_id,
100959a6	85	'formats': formats,
596ac6e3 PH	86	'uploader': uploader,
596ac6e3 PH	87	'uploader_id': uploader_id,
15369766	88	'title': title,
46720279	89	'thumbnail': self._og_search_thumbnail(webpage),
596ac6e3	90	'description': description,
15369766	91	}