]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/escapist.py
Merge remote-tracking branch 'origin/master'
[yt-dlp.git] / youtube_dl / extractor / escapist.py
CommitLineData
6f90d098
PH
1from __future__ import unicode_literals
2
15369766 3from .common import InfoExtractor
1cc79574 4from ..compat import (
15369766 5 compat_urllib_parse,
63a562f9 6 compat_urllib_request,
1cc79574
PH
7)
8from ..utils import (
15369766 9 ExtractorError,
596ac6e3 10 js_to_json,
15369766
PH
11)
12
13
14class EscapistIE(InfoExtractor):
596ac6e3 15 _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
63a562f9 16 _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
6f5ac90c 17 _TEST = {
6f90d098
PH
18 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
19 'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
20 'info_dict': {
21 'id': '6618',
22 'ext': 'mp4',
23 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
596ac6e3
PH
24 'uploader_id': 'the-escapist-presents',
25 'uploader': 'The Escapist Presents',
6f90d098 26 'title': "Breaking Down Baldur's Gate",
3da0db62 27 'thumbnail': 're:^https?://.*\.jpg$',
6f5ac90c
PH
28 }
29 }
15369766
PH
30
31 def _real_extract(self, url):
596ac6e3 32 video_id = self._match_id(url)
63a562f9
PH
33 webpage_req = compat_urllib_request.Request(url)
34 webpage_req.add_header('User-Agent', self._USER_AGENT)
35 webpage = self._download_webpage(webpage_req, video_id)
15369766 36
596ac6e3 37 uploader_id = self._html_search_regex(
c010af6f 38 r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
596ac6e3
PH
39 webpage, 'uploader ID', fatal=False)
40 uploader = self._html_search_regex(
c010af6f 41 r"<h1\s+class='headline'>(.*?)</a>",
596ac6e3
PH
42 webpage, 'uploader', fatal=False)
43 description = self._html_search_meta('description', webpage)
15369766 44
596ac6e3
PH
45 raw_title = self._html_search_meta('title', webpage, fatal=True)
46 title = raw_title.partition(' : ')[2]
15369766 47
3da0db62 48 config_url = compat_urllib_parse.unquote(self._html_search_regex(
54233c90
PH
49 r'''(?x)
50 (?:
1c69bca2 51 <param\s+name="flashvars".*?\s+value="config=|
54233c90
PH
52 flashvars=&quot;config=
53 )
1c69bca2 54 (https?://[^"&]+)
54233c90
PH
55 ''',
56 webpage, 'config URL'))
15369766 57
100959a6 58 formats = []
9a48926a 59 ad_formats = []
15369766 60
63a562f9
PH
61 def _add_format(name, cfg_url, quality):
62 cfg_req = compat_urllib_request.Request(cfg_url)
63 cfg_req.add_header('User-Agent', self._USER_AGENT)
6f90d098 64 config = self._download_json(
63a562f9 65 cfg_req, video_id,
6f90d098
PH
66 'Downloading ' + name + ' configuration',
67 'Unable to download ' + name + ' configuration',
596ac6e3 68 transform_source=js_to_json)
15369766 69
100959a6 70 playlist = config['playlist']
9a48926a
PH
71 for p in playlist:
72 if p.get('eventCategory') == 'Video':
73 ar = formats
74 elif p.get('eventCategory') == 'Video Postroll':
75 ar = ad_formats
76 else:
77 continue
78
79 ar.append({
80 'url': p['url'],
81 'format_id': name,
82 'quality': quality,
63a562f9
PH
83 'http_headers': {
84 'User-Agent': self._USER_AGENT,
85 },
9a48926a 86 })
15369766 87
596ac6e3
PH
88 _add_format('normal', config_url, quality=0)
89 hq_url = (config_url +
90 ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
100959a6 91 try:
6f90d098 92 _add_format('hq', hq_url, quality=1)
15ff3c83
PH
93 except ExtractorError:
94 pass # That's fine, we'll just use normal quality
6f90d098
PH
95 self._sort_formats(formats)
96
63a562f9
PH
97 if '/escapist/sales-marketing/' in formats[-1]['url']:
98 raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
99
9a48926a 100 res = {
6f90d098 101 'id': video_id,
100959a6 102 'formats': formats,
596ac6e3
PH
103 'uploader': uploader,
104 'uploader_id': uploader_id,
15369766 105 'title': title,
46720279 106 'thumbnail': self._og_search_thumbnail(webpage),
596ac6e3 107 'description': description,
15369766 108 }
9a48926a
PH
109
110 if self._downloader.params.get('include_ads') and ad_formats:
111 self._sort_formats(ad_formats)
112 ad_res = {
113 'id': '%s-ad' % video_id,
114 'title': '%s (Postroll)' % title,
115 'formats': ad_formats,
116 }
117 return {
118 '_type': 'playlist',
119 'entries': [res, ad_res],
120 'title': title,
121 'id': video_id,
122 }
123
124 return res