]>
Commit | Line | Data |
---|---|---|
3798eadc PH |
1 | from __future__ import unicode_literals |
2 | ||
44586389 | 3 | import re |
84353056 | 4 | import json |
44586389 PH |
5 | |
6 | from .common import InfoExtractor | |
1cc79574 | 7 | from ..compat import compat_urlparse |
44586389 | 8 | from ..utils import ( |
9572013d | 9 | int_or_none, |
44586389 PH |
10 | ) |
11 | ||
12 | ||
13 | class AppleTrailersIE(InfoExtractor): | |
60427f63 | 14 | IE_NAME = 'appletrailers' |
f311cfa2 | 15 | _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' |
35b79823 | 16 | _TESTS = [{ |
e759a001 | 17 | 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', |
11e611a7 PH |
18 | 'info_dict': { |
19 | 'id': 'manofsteel', | |
20 | }, | |
e759a001 | 21 | 'playlist': [ |
44586389 | 22 | { |
e759a001 S |
23 | 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', |
24 | 'info_dict': { | |
25 | 'id': 'manofsteel-trailer4', | |
26 | 'ext': 'mov', | |
27 | 'duration': 111, | |
28 | 'title': 'Trailer 4', | |
29 | 'upload_date': '20130523', | |
30 | 'uploader_id': 'wb', | |
44586389 PH |
31 | }, |
32 | }, | |
33 | { | |
e759a001 S |
34 | 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', |
35 | 'info_dict': { | |
36 | 'id': 'manofsteel-trailer3', | |
37 | 'ext': 'mov', | |
38 | 'duration': 182, | |
39 | 'title': 'Trailer 3', | |
40 | 'upload_date': '20130417', | |
41 | 'uploader_id': 'wb', | |
44586389 PH |
42 | }, |
43 | }, | |
44 | { | |
e759a001 S |
45 | 'md5': 'd0f1e1150989b9924679b441f3404d48', |
46 | 'info_dict': { | |
47 | 'id': 'manofsteel-trailer', | |
48 | 'ext': 'mov', | |
49 | 'duration': 148, | |
50 | 'title': 'Trailer', | |
51 | 'upload_date': '20121212', | |
52 | 'uploader_id': 'wb', | |
44586389 PH |
53 | }, |
54 | }, | |
55 | { | |
e759a001 S |
56 | 'md5': '5fe08795b943eb2e757fa95cb6def1cb', |
57 | 'info_dict': { | |
58 | 'id': 'manofsteel-teaser', | |
59 | 'ext': 'mov', | |
60 | 'duration': 93, | |
61 | 'title': 'Teaser', | |
62 | 'upload_date': '20120721', | |
63 | 'uploader_id': 'wb', | |
44586389 | 64 | }, |
28acf550 | 65 | }, |
44586389 | 66 | ] |
261b4c23 | 67 | }, { |
68 | 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', | |
69 | 'info_dict': { | |
70 | 'id': 'blackthorn', | |
71 | }, | |
72 | 'playlist_mincount': 2, | |
35b79823 S |
73 | }, { |
74 | 'url': 'http://trailers.apple.com/ca/metropole/autrui/', | |
75 | 'only_matching': True, | |
76 | }] | |
44586389 | 77 | |
84353056 JMF |
78 | _JSON_RE = r'iTunes.playURL\((.*?)\);' |
79 | ||
44586389 PH |
80 | def _real_extract(self, url): |
81 | mobj = re.match(self._VALID_URL, url) | |
82 | movie = mobj.group('movie') | |
83 | uploader_id = mobj.group('company') | |
84 | ||
28acf550 | 85 | playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') |
5f6a1245 | 86 | |
18258362 | 87 | def fix_html(s): |
28acf550 | 88 | s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) |
7fe37d8a | 89 | s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s) |
18258362 JMF |
90 | # The ' in the onClick attributes are not escaped, it couldn't be parsed |
91 | # like: http://trailers.apple.com/trailers/wb/gravity/ | |
5f6a1245 | 92 | |
18258362 | 93 | def _clean_json(m): |
28acf550 | 94 | return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') |
18258362 | 95 | s = re.sub(self._JSON_RE, _clean_json, s) |
e91cdcae | 96 | s = '<html>%s</html>' % s |
18258362 JMF |
97 | return s |
98 | doc = self._download_xml(playlist_url, movie, transform_source=fix_html) | |
44586389 | 99 | |
44586389 PH |
100 | playlist = [] |
101 | for li in doc.findall('./div/ul/li'): | |
84353056 JMF |
102 | on_click = li.find('.//a').attrib['onClick'] |
103 | trailer_info_json = self._search_regex(self._JSON_RE, | |
9e1a5b84 | 104 | on_click, 'trailer info') |
84353056 | 105 | trailer_info = json.loads(trailer_info_json) |
261b4c23 | 106 | first_url = trailer_info.get('url') |
107 | if not first_url: | |
108 | continue | |
84353056 | 109 | title = trailer_info['title'] |
44586389 PH |
110 | video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() |
111 | thumbnail = li.find('.//img').attrib['src'] | |
84353056 | 112 | upload_date = trailer_info['posted'].replace('-', '') |
44586389 | 113 | |
84353056 JMF |
114 | runtime = trailer_info['runtime'] |
115 | m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) | |
44586389 PH |
116 | duration = None |
117 | if m: | |
118 | duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | |
119 | ||
bb4aa62c | 120 | trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() |
84353056 | 121 | settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) |
28acf550 | 122 | settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') |
44586389 | 123 | |
84353056 JMF |
124 | formats = [] |
125 | for format in settings['metadata']['sizes']: | |
126 | # The src is a file pointing to the real video file | |
127 | format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) | |
128 | formats.append({ | |
129 | 'url': format_url, | |
84353056 | 130 | 'format': format['type'], |
9572013d PH |
131 | 'width': int_or_none(format['width']), |
132 | 'height': int_or_none(format['height']), | |
84353056 | 133 | }) |
7b8af563 PH |
134 | |
135 | self._sort_formats(formats) | |
44586389 | 136 | |
fb7abb31 | 137 | playlist.append({ |
44586389 PH |
138 | '_type': 'video', |
139 | 'id': video_id, | |
44586389 PH |
140 | 'formats': formats, |
141 | 'title': title, | |
142 | 'duration': duration, | |
143 | 'thumbnail': thumbnail, | |
144 | 'upload_date': upload_date, | |
145 | 'uploader_id': uploader_id, | |
e1554a40 JMF |
146 | 'http_headers': { |
147 | 'User-Agent': 'QuickTime compatible (youtube-dl)', | |
148 | }, | |
fb7abb31 | 149 | }) |
44586389 PH |
150 | |
151 | return { | |
152 | '_type': 'playlist', | |
153 | 'id': movie, | |
154 | 'entries': playlist, | |
155 | } | |
60427f63 | 156 | |
157 | ||
158 | class AppleTrailersSectionIE(InfoExtractor): | |
159 | IE_NAME = 'appletrailers:section' | |
160 | _SECTIONS = { | |
161 | 'justadded': { | |
162 | 'feed_path': 'just_added', | |
163 | 'title': 'Just Added', | |
164 | }, | |
165 | 'exclusive': { | |
166 | 'feed_path': 'exclusive', | |
167 | 'title': 'Exclusive', | |
168 | }, | |
169 | 'justhd': { | |
170 | 'feed_path': 'just_hd', | |
171 | 'title': 'Just HD', | |
172 | }, | |
173 | 'mostpopular': { | |
174 | 'feed_path': 'most_pop', | |
175 | 'title': 'Most Popular', | |
176 | }, | |
177 | 'moviestudios': { | |
178 | 'feed_path': 'studios', | |
179 | 'title': 'Movie Studios', | |
180 | }, | |
181 | } | |
182 | _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) | |
183 | _TESTS = [{ | |
184 | 'url': 'http://trailers.apple.com/#section=justadded', | |
185 | 'info_dict': { | |
186 | 'title': 'Just Added', | |
187 | 'id': 'justadded', | |
188 | }, | |
189 | 'playlist_mincount': 80, | |
190 | }, { | |
191 | 'url': 'http://trailers.apple.com/#section=exclusive', | |
192 | 'info_dict': { | |
193 | 'title': 'Exclusive', | |
194 | 'id': 'exclusive', | |
195 | }, | |
196 | 'playlist_mincount': 80, | |
197 | }, { | |
198 | 'url': 'http://trailers.apple.com/#section=justhd', | |
199 | 'info_dict': { | |
200 | 'title': 'Just HD', | |
201 | 'id': 'justhd', | |
202 | }, | |
203 | 'playlist_mincount': 80, | |
204 | }, { | |
205 | 'url': 'http://trailers.apple.com/#section=mostpopular', | |
206 | 'info_dict': { | |
207 | 'title': 'Most Popular', | |
208 | 'id': 'mostpopular', | |
209 | }, | |
210 | 'playlist_mincount': 80, | |
211 | }, { | |
212 | 'url': 'http://trailers.apple.com/#section=moviestudios', | |
213 | 'info_dict': { | |
214 | 'title': 'Movie Studios', | |
215 | 'id': 'moviestudios', | |
216 | }, | |
217 | 'playlist_mincount': 80, | |
218 | }] | |
219 | ||
220 | def _real_extract(self, url): | |
221 | section = self._match_id(url) | |
222 | section_data = self._download_json( | |
223 | 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], | |
224 | section) | |
225 | entries = [ | |
226 | self.url_result('http://trailers.apple.com' + e['location']) | |
227 | for e in section_data] | |
228 | return self.playlist_result(entries, section, self._SECTIONS[section]['title']) |