]>
Commit | Line | Data |
---|---|---|
84353056 | 1 | import json |
e897bd82 | 2 | import re |
add96eb9 | 3 | import urllib.parse |
44586389 PH |
4 | |
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
9572013d | 7 | int_or_none, |
494172d2 RA |
8 | parse_duration, |
9 | unified_strdate, | |
44586389 PH |
10 | ) |
11 | ||
12 | ||
13 | class AppleTrailersIE(InfoExtractor): | |
60427f63 | 14 | IE_NAME = 'appletrailers' |
958759f4 | 15 | _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' |
35b79823 | 16 | _TESTS = [{ |
e759a001 | 17 | 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', |
11e611a7 | 18 | 'info_dict': { |
494172d2 RA |
19 | 'id': '5111', |
20 | 'title': 'Man of Steel', | |
11e611a7 | 21 | }, |
e759a001 | 22 | 'playlist': [ |
44586389 | 23 | { |
e759a001 S |
24 | 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', |
25 | 'info_dict': { | |
26 | 'id': 'manofsteel-trailer4', | |
27 | 'ext': 'mov', | |
28 | 'duration': 111, | |
29 | 'title': 'Trailer 4', | |
30 | 'upload_date': '20130523', | |
31 | 'uploader_id': 'wb', | |
44586389 PH |
32 | }, |
33 | }, | |
34 | { | |
e759a001 S |
35 | 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', |
36 | 'info_dict': { | |
37 | 'id': 'manofsteel-trailer3', | |
38 | 'ext': 'mov', | |
39 | 'duration': 182, | |
40 | 'title': 'Trailer 3', | |
41 | 'upload_date': '20130417', | |
42 | 'uploader_id': 'wb', | |
44586389 PH |
43 | }, |
44 | }, | |
45 | { | |
e759a001 S |
46 | 'md5': 'd0f1e1150989b9924679b441f3404d48', |
47 | 'info_dict': { | |
48 | 'id': 'manofsteel-trailer', | |
49 | 'ext': 'mov', | |
50 | 'duration': 148, | |
51 | 'title': 'Trailer', | |
52 | 'upload_date': '20121212', | |
53 | 'uploader_id': 'wb', | |
44586389 PH |
54 | }, |
55 | }, | |
56 | { | |
e759a001 S |
57 | 'md5': '5fe08795b943eb2e757fa95cb6def1cb', |
58 | 'info_dict': { | |
59 | 'id': 'manofsteel-teaser', | |
60 | 'ext': 'mov', | |
61 | 'duration': 93, | |
62 | 'title': 'Teaser', | |
63 | 'upload_date': '20120721', | |
64 | 'uploader_id': 'wb', | |
44586389 | 65 | }, |
28acf550 | 66 | }, |
add96eb9 | 67 | ], |
261b4c23 | 68 | }, { |
69 | 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', | |
70 | 'info_dict': { | |
76c19510 TF |
71 | 'id': '4489', |
72 | 'title': 'Blackthorn', | |
261b4c23 | 73 | }, |
74 | 'playlist_mincount': 2, | |
494172d2 RA |
75 | 'expected_warnings': ['Unable to download JSON metadata'], |
76 | }, { | |
77 | # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json | |
78 | 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', | |
79 | 'info_dict': { | |
80 | 'id': '15881', | |
81 | 'title': 'Kung Fu Panda 3', | |
82 | }, | |
83 | 'playlist_mincount': 4, | |
35b79823 S |
84 | }, { |
85 | 'url': 'http://trailers.apple.com/ca/metropole/autrui/', | |
86 | 'only_matching': True, | |
958759f4 YCH |
87 | }, { |
88 | 'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', | |
89 | 'only_matching': True, | |
35b79823 | 90 | }] |
44586389 | 91 | |
84353056 JMF |
92 | _JSON_RE = r'iTunes.playURL\((.*?)\);' |
93 | ||
44586389 | 94 | def _real_extract(self, url): |
5ad28e7f | 95 | mobj = self._match_valid_url(url) |
44586389 PH |
96 | movie = mobj.group('movie') |
97 | uploader_id = mobj.group('company') | |
98 | ||
494172d2 RA |
99 | webpage = self._download_webpage(url, movie) |
100 | film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') | |
101 | film_data = self._download_json( | |
add96eb9 | 102 | f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', |
494172d2 RA |
103 | film_id, fatal=False) |
104 | ||
105 | if film_data: | |
106 | entries = [] | |
107 | for clip in film_data.get('clips', []): | |
108 | clip_title = clip['title'] | |
109 | ||
110 | formats = [] | |
111 | for version, version_data in clip.get('versions', {}).items(): | |
112 | for size, size_data in version_data.get('sizes', {}).items(): | |
113 | src = size_data.get('src') | |
114 | if not src: | |
115 | continue | |
116 | formats.append({ | |
add96eb9 | 117 | 'format_id': f'{version}-{size}', |
197224b7 | 118 | 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), |
494172d2 RA |
119 | 'width': int_or_none(size_data.get('width')), |
120 | 'height': int_or_none(size_data.get('height')), | |
121 | 'language': version[:2], | |
122 | }) | |
494172d2 RA |
123 | |
124 | entries.append({ | |
125 | 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), | |
126 | 'formats': formats, | |
127 | 'title': clip_title, | |
fee70322 | 128 | 'thumbnail': clip.get('screen') or clip.get('thumb'), |
494172d2 RA |
129 | 'duration': parse_duration(clip.get('runtime') or clip.get('faded')), |
130 | 'upload_date': unified_strdate(clip.get('posted')), | |
131 | 'uploader_id': uploader_id, | |
132 | }) | |
133 | ||
134 | page_data = film_data.get('page', {}) | |
135 | return self.playlist_result(entries, film_id, page_data.get('movie_title')) | |
136 | ||
add96eb9 | 137 | playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') |
5f6a1245 | 138 | |
18258362 | 139 | def fix_html(s): |
28acf550 | 140 | s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) |
7fe37d8a | 141 | s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s) |
18258362 JMF |
142 | # The ' in the onClick attributes are not escaped, it couldn't be parsed |
143 | # like: http://trailers.apple.com/trailers/wb/gravity/ | |
5f6a1245 | 144 | |
18258362 | 145 | def _clean_json(m): |
add96eb9 | 146 | return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) |
18258362 | 147 | s = re.sub(self._JSON_RE, _clean_json, s) |
add96eb9 | 148 | return f'<html>{s}</html>' |
18258362 | 149 | doc = self._download_xml(playlist_url, movie, transform_source=fix_html) |
44586389 | 150 | |
44586389 PH |
151 | playlist = [] |
152 | for li in doc.findall('./div/ul/li'): | |
84353056 JMF |
153 | on_click = li.find('.//a').attrib['onClick'] |
154 | trailer_info_json = self._search_regex(self._JSON_RE, | |
9e1a5b84 | 155 | on_click, 'trailer info') |
84353056 | 156 | trailer_info = json.loads(trailer_info_json) |
261b4c23 | 157 | first_url = trailer_info.get('url') |
158 | if not first_url: | |
159 | continue | |
84353056 | 160 | title = trailer_info['title'] |
44586389 PH |
161 | video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() |
162 | thumbnail = li.find('.//img').attrib['src'] | |
84353056 | 163 | upload_date = trailer_info['posted'].replace('-', '') |
44586389 | 164 | |
84353056 JMF |
165 | runtime = trailer_info['runtime'] |
166 | m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) | |
44586389 PH |
167 | duration = None |
168 | if m: | |
169 | duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | |
170 | ||
bb4aa62c | 171 | trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() |
add96eb9 | 172 | settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') |
28acf550 | 173 | settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') |
44586389 | 174 | |
84353056 | 175 | formats = [] |
add96eb9 | 176 | for fmt in settings['metadata']['sizes']: |
84353056 | 177 | # The src is a file pointing to the real video file |
add96eb9 | 178 | format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) |
84353056 JMF |
179 | formats.append({ |
180 | 'url': format_url, | |
add96eb9 | 181 | 'format': fmt['type'], |
182 | 'width': int_or_none(fmt['width']), | |
183 | 'height': int_or_none(fmt['height']), | |
84353056 | 184 | }) |
7b8af563 | 185 | |
fb7abb31 | 186 | playlist.append({ |
44586389 PH |
187 | '_type': 'video', |
188 | 'id': video_id, | |
44586389 PH |
189 | 'formats': formats, |
190 | 'title': title, | |
191 | 'duration': duration, | |
192 | 'thumbnail': thumbnail, | |
193 | 'upload_date': upload_date, | |
194 | 'uploader_id': uploader_id, | |
e1554a40 | 195 | 'http_headers': { |
7a5c1cfe | 196 | 'User-Agent': 'QuickTime compatible (yt-dlp)', |
e1554a40 | 197 | }, |
fb7abb31 | 198 | }) |
44586389 PH |
199 | |
200 | return { | |
201 | '_type': 'playlist', | |
202 | 'id': movie, | |
203 | 'entries': playlist, | |
204 | } | |
60427f63 | 205 | |
206 | ||
207 | class AppleTrailersSectionIE(InfoExtractor): | |
208 | IE_NAME = 'appletrailers:section' | |
209 | _SECTIONS = { | |
210 | 'justadded': { | |
211 | 'feed_path': 'just_added', | |
212 | 'title': 'Just Added', | |
213 | }, | |
214 | 'exclusive': { | |
215 | 'feed_path': 'exclusive', | |
216 | 'title': 'Exclusive', | |
217 | }, | |
218 | 'justhd': { | |
219 | 'feed_path': 'just_hd', | |
220 | 'title': 'Just HD', | |
221 | }, | |
222 | 'mostpopular': { | |
223 | 'feed_path': 'most_pop', | |
224 | 'title': 'Most Popular', | |
225 | }, | |
226 | 'moviestudios': { | |
227 | 'feed_path': 'studios', | |
228 | 'title': 'Movie Studios', | |
229 | }, | |
230 | } | |
add96eb9 | 231 | _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS)) |
60427f63 | 232 | _TESTS = [{ |
233 | 'url': 'http://trailers.apple.com/#section=justadded', | |
234 | 'info_dict': { | |
235 | 'title': 'Just Added', | |
236 | 'id': 'justadded', | |
237 | }, | |
238 | 'playlist_mincount': 80, | |
239 | }, { | |
240 | 'url': 'http://trailers.apple.com/#section=exclusive', | |
241 | 'info_dict': { | |
242 | 'title': 'Exclusive', | |
243 | 'id': 'exclusive', | |
244 | }, | |
245 | 'playlist_mincount': 80, | |
246 | }, { | |
247 | 'url': 'http://trailers.apple.com/#section=justhd', | |
248 | 'info_dict': { | |
249 | 'title': 'Just HD', | |
250 | 'id': 'justhd', | |
251 | }, | |
252 | 'playlist_mincount': 80, | |
253 | }, { | |
254 | 'url': 'http://trailers.apple.com/#section=mostpopular', | |
255 | 'info_dict': { | |
256 | 'title': 'Most Popular', | |
257 | 'id': 'mostpopular', | |
258 | }, | |
76c19510 | 259 | 'playlist_mincount': 30, |
60427f63 | 260 | }, { |
261 | 'url': 'http://trailers.apple.com/#section=moviestudios', | |
262 | 'info_dict': { | |
263 | 'title': 'Movie Studios', | |
264 | 'id': 'moviestudios', | |
265 | }, | |
266 | 'playlist_mincount': 80, | |
267 | }] | |
268 | ||
269 | def _real_extract(self, url): | |
270 | section = self._match_id(url) | |
271 | section_data = self._download_json( | |
add96eb9 | 272 | 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), |
60427f63 | 273 | section) |
274 | entries = [ | |
275 | self.url_result('http://trailers.apple.com' + e['location']) | |
276 | for e in section_data] | |
277 | return self.playlist_result(entries, section, self._SECTIONS[section]['title']) |