]>
Commit | Line | Data |
---|---|---|
44586389 | 1 | import re |
84353056 | 2 | import json |
44586389 PH |
3 | |
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
84353056 | 6 | compat_urlparse, |
44586389 | 7 | determine_ext, |
44586389 PH |
8 | ) |
9 | ||
10 | ||
11 | class AppleTrailersIE(InfoExtractor): | |
c0ade33e | 12 | _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' |
44586389 PH |
13 | _TEST = { |
14 | u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", | |
15 | u"playlist": [ | |
16 | { | |
17 | u"file": u"manofsteel-trailer4.mov", | |
84353056 | 18 | u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", |
44586389 PH |
19 | u"info_dict": { |
20 | u"duration": 111, | |
44586389 PH |
21 | u"title": u"Trailer 4", |
22 | u"upload_date": u"20130523", | |
23 | u"uploader_id": u"wb", | |
24 | }, | |
25 | }, | |
26 | { | |
27 | u"file": u"manofsteel-trailer3.mov", | |
84353056 | 28 | u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", |
44586389 PH |
29 | u"info_dict": { |
30 | u"duration": 182, | |
44586389 PH |
31 | u"title": u"Trailer 3", |
32 | u"upload_date": u"20130417", | |
33 | u"uploader_id": u"wb", | |
34 | }, | |
35 | }, | |
36 | { | |
37 | u"file": u"manofsteel-trailer.mov", | |
84353056 | 38 | u"md5": u"d0f1e1150989b9924679b441f3404d48", |
44586389 PH |
39 | u"info_dict": { |
40 | u"duration": 148, | |
44586389 PH |
41 | u"title": u"Trailer", |
42 | u"upload_date": u"20121212", | |
43 | u"uploader_id": u"wb", | |
44 | }, | |
45 | }, | |
46 | { | |
47 | u"file": u"manofsteel-teaser.mov", | |
84353056 | 48 | u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", |
44586389 PH |
49 | u"info_dict": { |
50 | u"duration": 93, | |
44586389 PH |
51 | u"title": u"Teaser", |
52 | u"upload_date": u"20120721", | |
53 | u"uploader_id": u"wb", | |
54 | }, | |
55 | } | |
56 | ] | |
57 | } | |
58 | ||
84353056 JMF |
59 | _JSON_RE = r'iTunes.playURL\((.*?)\);' |
60 | ||
44586389 PH |
61 | def _real_extract(self, url): |
62 | mobj = re.match(self._VALID_URL, url) | |
63 | movie = mobj.group('movie') | |
64 | uploader_id = mobj.group('company') | |
65 | ||
84353056 | 66 | playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') |
18258362 JMF |
67 | def fix_html(s): |
68 | s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s) | |
69 | s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s) | |
70 | # The ' in the onClick attributes are not escaped, it couldn't be parsed | |
71 | # like: http://trailers.apple.com/trailers/wb/gravity/ | |
72 | def _clean_json(m): | |
73 | return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') | |
74 | s = re.sub(self._JSON_RE, _clean_json, s) | |
75 | s = u'<html>' + s + u'</html>' | |
76 | return s | |
77 | doc = self._download_xml(playlist_url, movie, transform_source=fix_html) | |
44586389 | 78 | |
44586389 PH |
79 | playlist = [] |
80 | for li in doc.findall('./div/ul/li'): | |
84353056 JMF |
81 | on_click = li.find('.//a').attrib['onClick'] |
82 | trailer_info_json = self._search_regex(self._JSON_RE, | |
83 | on_click, u'trailer info') | |
84 | trailer_info = json.loads(trailer_info_json) | |
85 | title = trailer_info['title'] | |
44586389 PH |
86 | video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() |
87 | thumbnail = li.find('.//img').attrib['src'] | |
84353056 | 88 | upload_date = trailer_info['posted'].replace('-', '') |
44586389 | 89 | |
84353056 JMF |
90 | runtime = trailer_info['runtime'] |
91 | m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) | |
44586389 PH |
92 | duration = None |
93 | if m: | |
94 | duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | |
95 | ||
84353056 | 96 | first_url = trailer_info['url'] |
bb4aa62c | 97 | trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() |
84353056 JMF |
98 | settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) |
99 | settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') | |
100 | settings = json.loads(settings_json) | |
44586389 | 101 | |
84353056 JMF |
102 | formats = [] |
103 | for format in settings['metadata']['sizes']: | |
104 | # The src is a file pointing to the real video file | |
105 | format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) | |
106 | formats.append({ | |
107 | 'url': format_url, | |
108 | 'ext': determine_ext(format_url), | |
109 | 'format': format['type'], | |
110 | 'width': format['width'], | |
111 | 'height': int(format['height']), | |
112 | }) | |
7b8af563 PH |
113 | |
114 | self._sort_formats(formats) | |
44586389 | 115 | |
fb7abb31 | 116 | playlist.append({ |
44586389 PH |
117 | '_type': 'video', |
118 | 'id': video_id, | |
119 | 'title': title, | |
120 | 'formats': formats, | |
121 | 'title': title, | |
122 | 'duration': duration, | |
123 | 'thumbnail': thumbnail, | |
124 | 'upload_date': upload_date, | |
125 | 'uploader_id': uploader_id, | |
126 | 'user_agent': 'QuickTime compatible (youtube-dl)', | |
fb7abb31 | 127 | }) |
44586389 PH |
128 | |
129 | return { | |
130 | '_type': 'playlist', | |
131 | 'id': movie, | |
132 | 'entries': playlist, | |
133 | } |