]>
Commit | Line | Data |
---|---|---|
400afdda | 1 | # encoding: utf-8 |
5bb67dbf | 2 | from __future__ import unicode_literals |
d9dd3584 | 3 | |
400afdda | 4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | ExtractorError, | |
d9dd3584 | 9 | int_or_none, |
400afdda | 10 | ) |
11 | ||
8032e31f | 12 | |
400afdda | 13 | class CinemassacreIE(InfoExtractor): |
0d3641e5 | 14 | _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' |
5bb67dbf JMF |
15 | _TESTS = [ |
16 | { | |
17 | 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | |
7cf4547a | 18 | 'md5': 'fde81fbafaee331785f58cd6c0d46190', |
5bb67dbf | 19 | 'info_dict': { |
d9dd3584 S |
20 | 'id': '19911', |
21 | 'ext': 'mp4', | |
5bb67dbf JMF |
22 | 'upload_date': '20121110', |
23 | 'title': '“Angry Video Game Nerd: The Movie” – Trailer', | |
24 | 'description': 'md5:fb87405fcb42a331742a0dce2708560b', | |
25 | }, | |
1ece880d | 26 | }, |
5bb67dbf JMF |
27 | { |
28 | 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | |
7cf4547a | 29 | 'md5': 'd72f10cd39eac4215048f62ab477a511', |
5bb67dbf | 30 | 'info_dict': { |
d9dd3584 S |
31 | 'id': '521be8ef82b16', |
32 | 'ext': 'mp4', | |
5bb67dbf JMF |
33 | 'upload_date': '20131002', |
34 | 'title': 'The Mummy’s Hand (1940)', | |
35 | }, | |
36 | } | |
37 | ] | |
400afdda | 38 | |
8032e31f | 39 | def _real_extract(self, url): |
400afdda | 40 | mobj = re.match(self._VALID_URL, url) |
0d3641e5 | 41 | display_id = mobj.group('display_id') |
400afdda | 42 | |
0d3641e5 | 43 | webpage = self._download_webpage(url, display_id) |
400afdda | 44 | video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') |
ab4e1513 | 45 | mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) |
1ece880d | 46 | if not mobj: |
5bb67dbf JMF |
47 | raise ExtractorError('Can\'t extract embed url and video id') |
48 | playerdata_url = mobj.group('embed_url') | |
49 | video_id = mobj.group('video_id') | |
1ece880d | 50 | |
0d3641e5 PH |
51 | video_title = self._html_search_regex( |
52 | r'<title>(?P<title>.+?)\|', webpage, 'title') | |
53 | video_description = self._html_search_regex( | |
54 | r'<div class="entry-content">(?P<description>.+?)</div>', | |
5bb67dbf | 55 | webpage, 'description', flags=re.DOTALL, fatal=False) |
8032e31f | 56 | |
d9dd3584 S |
57 | playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') |
58 | video_thumbnail = self._search_regex( | |
59 | r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) | |
7cf4547a | 60 | sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') |
61 | videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url') | |
d9dd3584 S |
62 | |
63 | videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') | |
64 | ||
7cf4547a | 65 | formats = [] |
66 | baseurl = sd_url[:sd_url.rfind('/')+1] | |
d9dd3584 S |
67 | for video in videolist.findall('.//video'): |
68 | src = video.get('src') | |
69 | if not src: | |
70 | continue | |
71 | file_ = src.partition(':')[-1] | |
72 | width = int_or_none(video.get('width')) | |
73 | height = int_or_none(video.get('height')) | |
74 | bitrate = int_or_none(video.get('system-bitrate')) | |
7cf4547a | 75 | format = { |
d9dd3584 S |
76 | 'url': baseurl + file_, |
77 | 'format_id': src.rpartition('.')[0].rpartition('_')[-1], | |
7cf4547a | 78 | } |
d9dd3584 | 79 | if width or height: |
7cf4547a | 80 | format.update({ |
d9dd3584 S |
81 | 'tbr': bitrate // 1000 if bitrate else None, |
82 | 'width': width, | |
83 | 'height': height, | |
7cf4547a | 84 | }) |
85 | else: | |
86 | format.update({ | |
d9dd3584 S |
87 | 'abr': bitrate // 1000 if bitrate else None, |
88 | 'vcodec': 'none', | |
7cf4547a | 89 | }) |
90 | formats.append(format) | |
0d3641e5 | 91 | self._sort_formats(formats) |
400afdda | 92 | |
fcc28edb | 93 | return { |
8032e31f JMF |
94 | 'id': video_id, |
95 | 'title': video_title, | |
96 | 'formats': formats, | |
97 | 'description': video_description, | |
98 | 'upload_date': video_date, | |
99 | 'thumbnail': video_thumbnail, | |
100 | } |