]>
Commit | Line | Data |
---|---|---|
3798eadc PH |
1 | from __future__ import unicode_literals |
2 | ||
44586389 | 3 | import re |
84353056 | 4 | import json |
44586389 PH |
5 | |
6 | from .common import InfoExtractor | |
1cc79574 | 7 | from ..compat import compat_urlparse |
44586389 | 8 | from ..utils import ( |
9572013d | 9 | int_or_none, |
44586389 PH |
10 | ) |
11 | ||
12 | ||
13 | class AppleTrailersIE(InfoExtractor): | |
f311cfa2 | 14 | _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' |
35b79823 | 15 | _TESTS = [{ |
3798eadc | 16 | "url": "http://trailers.apple.com/trailers/wb/manofsteel/", |
11e611a7 PH |
17 | 'info_dict': { |
18 | 'id': 'manofsteel', | |
19 | }, | |
3798eadc | 20 | "playlist": [ |
44586389 | 21 | { |
3798eadc PH |
22 | "md5": "d97a8e575432dbcb81b7c3acb741f8a8", |
23 | "info_dict": { | |
28acf550 JMF |
24 | "id": "manofsteel-trailer4", |
25 | "ext": "mov", | |
3798eadc PH |
26 | "duration": 111, |
27 | "title": "Trailer 4", | |
28 | "upload_date": "20130523", | |
29 | "uploader_id": "wb", | |
44586389 PH |
30 | }, |
31 | }, | |
32 | { | |
3798eadc PH |
33 | "md5": "b8017b7131b721fb4e8d6f49e1df908c", |
34 | "info_dict": { | |
28acf550 JMF |
35 | "id": "manofsteel-trailer3", |
36 | "ext": "mov", | |
3798eadc PH |
37 | "duration": 182, |
38 | "title": "Trailer 3", | |
39 | "upload_date": "20130417", | |
40 | "uploader_id": "wb", | |
44586389 PH |
41 | }, |
42 | }, | |
43 | { | |
3798eadc PH |
44 | "md5": "d0f1e1150989b9924679b441f3404d48", |
45 | "info_dict": { | |
28acf550 JMF |
46 | "id": "manofsteel-trailer", |
47 | "ext": "mov", | |
3798eadc PH |
48 | "duration": 148, |
49 | "title": "Trailer", | |
50 | "upload_date": "20121212", | |
51 | "uploader_id": "wb", | |
44586389 PH |
52 | }, |
53 | }, | |
54 | { | |
3798eadc PH |
55 | "md5": "5fe08795b943eb2e757fa95cb6def1cb", |
56 | "info_dict": { | |
28acf550 JMF |
57 | "id": "manofsteel-teaser", |
58 | "ext": "mov", | |
3798eadc PH |
59 | "duration": 93, |
60 | "title": "Teaser", | |
61 | "upload_date": "20120721", | |
62 | "uploader_id": "wb", | |
44586389 | 63 | }, |
28acf550 | 64 | }, |
44586389 | 65 | ] |
35b79823 S |
66 | }, { |
67 | 'url': 'http://trailers.apple.com/ca/metropole/autrui/', | |
68 | 'only_matching': True, | |
69 | }] | |
44586389 | 70 | |
84353056 JMF |
71 | _JSON_RE = r'iTunes.playURL\((.*?)\);' |
72 | ||
44586389 PH |
73 | def _real_extract(self, url): |
74 | mobj = re.match(self._VALID_URL, url) | |
75 | movie = mobj.group('movie') | |
76 | uploader_id = mobj.group('company') | |
77 | ||
28acf550 | 78 | playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') |
5f6a1245 | 79 | |
18258362 | 80 | def fix_html(s): |
28acf550 | 81 | s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) |
18258362 JMF |
82 | s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s) |
83 | # The ' in the onClick attributes are not escaped, it couldn't be parsed | |
84 | # like: http://trailers.apple.com/trailers/wb/gravity/ | |
5f6a1245 | 85 | |
18258362 | 86 | def _clean_json(m): |
28acf550 | 87 | return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') |
18258362 | 88 | s = re.sub(self._JSON_RE, _clean_json, s) |
e91cdcae | 89 | s = '<html>%s</html>' % s |
18258362 JMF |
90 | return s |
91 | doc = self._download_xml(playlist_url, movie, transform_source=fix_html) | |
44586389 | 92 | |
44586389 PH |
93 | playlist = [] |
94 | for li in doc.findall('./div/ul/li'): | |
84353056 JMF |
95 | on_click = li.find('.//a').attrib['onClick'] |
96 | trailer_info_json = self._search_regex(self._JSON_RE, | |
9e1a5b84 | 97 | on_click, 'trailer info') |
84353056 JMF |
98 | trailer_info = json.loads(trailer_info_json) |
99 | title = trailer_info['title'] | |
44586389 PH |
100 | video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() |
101 | thumbnail = li.find('.//img').attrib['src'] | |
84353056 | 102 | upload_date = trailer_info['posted'].replace('-', '') |
44586389 | 103 | |
84353056 JMF |
104 | runtime = trailer_info['runtime'] |
105 | m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) | |
44586389 PH |
106 | duration = None |
107 | if m: | |
108 | duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | |
109 | ||
84353056 | 110 | first_url = trailer_info['url'] |
bb4aa62c | 111 | trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() |
84353056 | 112 | settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) |
28acf550 | 113 | settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') |
44586389 | 114 | |
84353056 JMF |
115 | formats = [] |
116 | for format in settings['metadata']['sizes']: | |
117 | # The src is a file pointing to the real video file | |
118 | format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) | |
119 | formats.append({ | |
120 | 'url': format_url, | |
84353056 | 121 | 'format': format['type'], |
9572013d PH |
122 | 'width': int_or_none(format['width']), |
123 | 'height': int_or_none(format['height']), | |
84353056 | 124 | }) |
7b8af563 PH |
125 | |
126 | self._sort_formats(formats) | |
44586389 | 127 | |
fb7abb31 | 128 | playlist.append({ |
44586389 PH |
129 | '_type': 'video', |
130 | 'id': video_id, | |
44586389 PH |
131 | 'formats': formats, |
132 | 'title': title, | |
133 | 'duration': duration, | |
134 | 'thumbnail': thumbnail, | |
135 | 'upload_date': upload_date, | |
136 | 'uploader_id': uploader_id, | |
e1554a40 JMF |
137 | 'http_headers': { |
138 | 'User-Agent': 'QuickTime compatible (youtube-dl)', | |
139 | }, | |
fb7abb31 | 140 | }) |
44586389 PH |
141 | |
142 | return { | |
143 | '_type': 'playlist', | |
144 | 'id': movie, | |
145 | 'entries': playlist, | |
146 | } |