]>
Commit | Line | Data |
---|---|---|
44586389 PH |
1 | import re |
2 | import xml.etree.ElementTree | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | determine_ext, | |
44586389 PH |
7 | ) |
8 | ||
9 | ||
10 | class AppleTrailersIE(InfoExtractor): | |
11 | _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' | |
12 | _TEST = { | |
13 | u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", | |
14 | u"playlist": [ | |
15 | { | |
16 | u"file": u"manofsteel-trailer4.mov", | |
17 | u"md5": u"11874af099d480cc09e103b189805d5f", | |
18 | u"info_dict": { | |
19 | u"duration": 111, | |
20 | u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg", | |
21 | u"title": u"Trailer 4", | |
22 | u"upload_date": u"20130523", | |
23 | u"uploader_id": u"wb", | |
24 | }, | |
25 | }, | |
26 | { | |
27 | u"file": u"manofsteel-trailer3.mov", | |
28 | u"md5": u"07a0a262aae5afe68120eed61137ab34", | |
29 | u"info_dict": { | |
30 | u"duration": 182, | |
31 | u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg", | |
32 | u"title": u"Trailer 3", | |
33 | u"upload_date": u"20130417", | |
34 | u"uploader_id": u"wb", | |
35 | }, | |
36 | }, | |
37 | { | |
38 | u"file": u"manofsteel-trailer.mov", | |
39 | u"md5": u"e401fde0813008e3307e54b6f384cff1", | |
40 | u"info_dict": { | |
41 | u"duration": 148, | |
42 | u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg", | |
43 | u"title": u"Trailer", | |
44 | u"upload_date": u"20121212", | |
45 | u"uploader_id": u"wb", | |
46 | }, | |
47 | }, | |
48 | { | |
49 | u"file": u"manofsteel-teaser.mov", | |
50 | u"md5": u"76b392f2ae9e7c98b22913c10a639c97", | |
51 | u"info_dict": { | |
52 | u"duration": 93, | |
53 | u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg", | |
54 | u"title": u"Teaser", | |
55 | u"upload_date": u"20120721", | |
56 | u"uploader_id": u"wb", | |
57 | }, | |
58 | } | |
59 | ] | |
60 | } | |
61 | ||
62 | def _real_extract(self, url): | |
63 | mobj = re.match(self._VALID_URL, url) | |
64 | movie = mobj.group('movie') | |
65 | uploader_id = mobj.group('company') | |
66 | ||
67 | playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc' | |
68 | playlist_snippet = self._download_webpage(playlist_url, movie) | |
69 | playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet) | |
70 | playlist_html = u'<html>' + playlist_cleaned + u'</html>' | |
71 | ||
72 | size_cache = {} | |
73 | ||
74 | doc = xml.etree.ElementTree.fromstring(playlist_html) | |
75 | playlist = [] | |
76 | for li in doc.findall('./div/ul/li'): | |
77 | title = li.find('.//h3').text | |
78 | video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() | |
79 | thumbnail = li.find('.//img').attrib['src'] | |
80 | ||
81 | date_el = li.find('.//p') | |
82 | upload_date = None | |
83 | m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text) | |
84 | if m: | |
85 | upload_date = u'20' + m.group('year') + m.group('month') + m.group('day') | |
86 | runtime_el = date_el.find('./br') | |
87 | m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail) | |
88 | duration = None | |
89 | if m: | |
90 | duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | |
91 | ||
92 | formats = [] | |
2eabb802 | 93 | for formats_el in li.findall('.//a'): |
44586389 PH |
94 | if formats_el.attrib['class'] != 'OverlayPanel': |
95 | continue | |
96 | target = formats_el.attrib['target'] | |
97 | ||
98 | format_code = formats_el.text | |
99 | if 'Automatic' in format_code: | |
100 | continue | |
101 | ||
102 | size_q = formats_el.attrib['href'] | |
103 | size_id = size_q.rpartition('#videos-')[2] | |
104 | if size_id not in size_cache: | |
105 | size_url = url + size_q | |
106 | sizepage_html = self._download_webpage( | |
107 | size_url, movie, | |
108 | note=u'Downloading size info %s' % size_id, | |
109 | errnote=u'Error while downloading size info %s' % size_id, | |
110 | ) | |
111 | _doc = xml.etree.ElementTree.fromstring(sizepage_html) | |
112 | size_cache[size_id] = _doc | |
113 | ||
114 | sizepage_doc = size_cache[size_id] | |
115 | links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a') | |
116 | for vid_a in links: | |
117 | href = vid_a.get('href') | |
118 | if not href.endswith(target): | |
119 | continue | |
120 | detail_q = href.partition('#')[0] | |
121 | detail_url = url + '/' + detail_q | |
122 | ||
123 | m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q) | |
124 | detail_id = m.group('detail_id') | |
125 | ||
126 | detail_html = self._download_webpage( | |
127 | detail_url, movie, | |
128 | note=u'Downloading detail %s %s' % (detail_id, size_id), | |
129 | errnote=u'Error while downloading detail %s %s' % (detail_id, size_id) | |
130 | ) | |
131 | detail_doc = xml.etree.ElementTree.fromstring(detail_html) | |
132 | movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a') | |
133 | assert movie_link_el.get('class') == 'movieLink' | |
134 | movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h') | |
135 | ext = determine_ext(movie_link) | |
136 | assert ext == 'mov' | |
137 | ||
138 | formats.append({ | |
139 | 'format': format_code, | |
140 | 'ext': ext, | |
141 | 'url': movie_link, | |
142 | }) | |
143 | ||
144 | info = { | |
145 | '_type': 'video', | |
146 | 'id': video_id, | |
147 | 'title': title, | |
148 | 'formats': formats, | |
149 | 'title': title, | |
150 | 'duration': duration, | |
151 | 'thumbnail': thumbnail, | |
152 | 'upload_date': upload_date, | |
153 | 'uploader_id': uploader_id, | |
154 | 'user_agent': 'QuickTime compatible (youtube-dl)', | |
155 | } | |
156 | # TODO: Remove when #980 has been merged | |
157 | info['url'] = formats[-1]['url'] | |
158 | info['ext'] = formats[-1]['ext'] | |
159 | ||
160 | playlist.append(info) | |
161 | ||
162 | return { | |
163 | '_type': 'playlist', | |
164 | 'id': movie, | |
165 | 'entries': playlist, | |
166 | } |