]>
Commit | Line | Data |
---|---|---|
1 | import functools | |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..compat import compat_str | |
6 | from ..utils import ( | |
7 | float_or_none, | |
8 | int_or_none, | |
9 | ISO639Utils, | |
10 | join_nonempty, | |
11 | OnDemandPagedList, | |
12 | parse_duration, | |
13 | str_or_none, | |
14 | str_to_int, | |
15 | unified_strdate, | |
16 | ) | |
17 | ||
18 | ||
19 | class AdobeTVBaseIE(InfoExtractor): | |
20 | def _call_api(self, path, video_id, query, note=None): | |
21 | return self._download_json( | |
22 | 'http://tv.adobe.com/api/v4/' + path, | |
23 | video_id, note, query=query)['data'] | |
24 | ||
25 | def _parse_subtitles(self, video_data, url_key): | |
26 | subtitles = {} | |
27 | for translation in video_data.get('translations', []): | |
28 | vtt_path = translation.get(url_key) | |
29 | if not vtt_path: | |
30 | continue | |
31 | lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) | |
32 | subtitles.setdefault(lang, []).append({ | |
33 | 'ext': 'vtt', | |
34 | 'url': vtt_path, | |
35 | }) | |
36 | return subtitles | |
37 | ||
38 | def _parse_video_data(self, video_data): | |
39 | video_id = compat_str(video_data['id']) | |
40 | title = video_data['title'] | |
41 | ||
42 | s3_extracted = False | |
43 | formats = [] | |
44 | for source in video_data.get('videos', []): | |
45 | source_url = source.get('url') | |
46 | if not source_url: | |
47 | continue | |
48 | f = { | |
49 | 'format_id': source.get('quality_level'), | |
50 | 'fps': int_or_none(source.get('frame_rate')), | |
51 | 'height': int_or_none(source.get('height')), | |
52 | 'tbr': int_or_none(source.get('video_data_rate')), | |
53 | 'width': int_or_none(source.get('width')), | |
54 | 'url': source_url, | |
55 | } | |
56 | original_filename = source.get('original_filename') | |
57 | if original_filename: | |
58 | if not (f.get('height') and f.get('width')): | |
59 | mobj = re.search(r'_(\d+)x(\d+)', original_filename) | |
60 | if mobj: | |
61 | f.update({ | |
62 | 'height': int(mobj.group(2)), | |
63 | 'width': int(mobj.group(1)), | |
64 | }) | |
65 | if original_filename.startswith('s3://') and not s3_extracted: | |
66 | formats.append({ | |
67 | 'format_id': 'original', | |
68 | 'quality': 1, | |
69 | 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), | |
70 | }) | |
71 | s3_extracted = True | |
72 | formats.append(f) | |
73 | self._sort_formats(formats) | |
74 | ||
75 | return { | |
76 | 'id': video_id, | |
77 | 'title': title, | |
78 | 'description': video_data.get('description'), | |
79 | 'thumbnail': video_data.get('thumbnail'), | |
80 | 'upload_date': unified_strdate(video_data.get('start_date')), | |
81 | 'duration': parse_duration(video_data.get('duration')), | |
82 | 'view_count': str_to_int(video_data.get('playcount')), | |
83 | 'formats': formats, | |
84 | 'subtitles': self._parse_subtitles(video_data, 'vtt'), | |
85 | } | |
86 | ||
87 | ||
88 | class AdobeTVEmbedIE(AdobeTVBaseIE): | |
89 | IE_NAME = 'adobetv:embed' | |
90 | _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' | |
91 | _TEST = { | |
92 | 'url': 'https://tv.adobe.com/embed/22/4153', | |
93 | 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', | |
94 | 'info_dict': { | |
95 | 'id': '4153', | |
96 | 'ext': 'flv', | |
97 | 'title': 'Creating Graphics Optimized for BlackBerry', | |
98 | 'description': 'md5:eac6e8dced38bdaae51cd94447927459', | |
99 | 'thumbnail': r're:https?://.*\.jpg$', | |
100 | 'upload_date': '20091109', | |
101 | 'duration': 377, | |
102 | 'view_count': int, | |
103 | }, | |
104 | } | |
105 | ||
106 | def _real_extract(self, url): | |
107 | video_id = self._match_id(url) | |
108 | ||
109 | video_data = self._call_api( | |
110 | 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] | |
111 | return self._parse_video_data(video_data) | |
112 | ||
113 | ||
114 | class AdobeTVIE(AdobeTVBaseIE): | |
115 | IE_NAME = 'adobetv' | |
116 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' | |
117 | ||
118 | _TEST = { | |
119 | 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', | |
120 | 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', | |
121 | 'info_dict': { | |
122 | 'id': '10981', | |
123 | 'ext': 'mp4', | |
124 | 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', | |
125 | 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', | |
126 | 'thumbnail': r're:https?://.*\.jpg$', | |
127 | 'upload_date': '20110914', | |
128 | 'duration': 60, | |
129 | 'view_count': int, | |
130 | }, | |
131 | } | |
132 | ||
133 | def _real_extract(self, url): | |
134 | language, show_urlname, urlname = self._match_valid_url(url).groups() | |
135 | if not language: | |
136 | language = 'en' | |
137 | ||
138 | video_data = self._call_api( | |
139 | 'episode/get', urlname, { | |
140 | 'disclosure': 'standard', | |
141 | 'language': language, | |
142 | 'show_urlname': show_urlname, | |
143 | 'urlname': urlname, | |
144 | })[0] | |
145 | return self._parse_video_data(video_data) | |
146 | ||
147 | ||
148 | class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): | |
149 | _PAGE_SIZE = 25 | |
150 | ||
151 | def _fetch_page(self, display_id, query, page): | |
152 | page += 1 | |
153 | query['page'] = page | |
154 | for element_data in self._call_api( | |
155 | self._RESOURCE, display_id, query, 'Download Page %d' % page): | |
156 | yield self._process_data(element_data) | |
157 | ||
158 | def _extract_playlist_entries(self, display_id, query): | |
159 | return OnDemandPagedList(functools.partial( | |
160 | self._fetch_page, display_id, query), self._PAGE_SIZE) | |
161 | ||
162 | ||
163 | class AdobeTVShowIE(AdobeTVPlaylistBaseIE): | |
164 | IE_NAME = 'adobetv:show' | |
165 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' | |
166 | ||
167 | _TEST = { | |
168 | 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', | |
169 | 'info_dict': { | |
170 | 'id': '36', | |
171 | 'title': 'The Complete Picture with Julieanne Kost', | |
172 | 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', | |
173 | }, | |
174 | 'playlist_mincount': 136, | |
175 | } | |
176 | _RESOURCE = 'episode' | |
177 | _process_data = AdobeTVBaseIE._parse_video_data | |
178 | ||
179 | def _real_extract(self, url): | |
180 | language, show_urlname = self._match_valid_url(url).groups() | |
181 | if not language: | |
182 | language = 'en' | |
183 | query = { | |
184 | 'disclosure': 'standard', | |
185 | 'language': language, | |
186 | 'show_urlname': show_urlname, | |
187 | } | |
188 | ||
189 | show_data = self._call_api( | |
190 | 'show/get', show_urlname, query)[0] | |
191 | ||
192 | return self.playlist_result( | |
193 | self._extract_playlist_entries(show_urlname, query), | |
194 | str_or_none(show_data.get('id')), | |
195 | show_data.get('show_name'), | |
196 | show_data.get('show_description')) | |
197 | ||
198 | ||
199 | class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): | |
200 | IE_NAME = 'adobetv:channel' | |
201 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' | |
202 | ||
203 | _TEST = { | |
204 | 'url': 'http://tv.adobe.com/channel/development', | |
205 | 'info_dict': { | |
206 | 'id': 'development', | |
207 | }, | |
208 | 'playlist_mincount': 96, | |
209 | } | |
210 | _RESOURCE = 'show' | |
211 | ||
212 | def _process_data(self, show_data): | |
213 | return self.url_result( | |
214 | show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) | |
215 | ||
216 | def _real_extract(self, url): | |
217 | language, channel_urlname, category_urlname = self._match_valid_url(url).groups() | |
218 | if not language: | |
219 | language = 'en' | |
220 | query = { | |
221 | 'channel_urlname': channel_urlname, | |
222 | 'language': language, | |
223 | } | |
224 | if category_urlname: | |
225 | query['category_urlname'] = category_urlname | |
226 | ||
227 | return self.playlist_result( | |
228 | self._extract_playlist_entries(channel_urlname, query), | |
229 | channel_urlname) | |
230 | ||
231 | ||
232 | class AdobeTVVideoIE(AdobeTVBaseIE): | |
233 | IE_NAME = 'adobetv:video' | |
234 | _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' | |
235 | _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]'] | |
236 | ||
237 | _TEST = { | |
238 | # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners | |
239 | 'url': 'https://video.tv.adobe.com/v/2456/', | |
240 | 'md5': '43662b577c018ad707a63766462b1e87', | |
241 | 'info_dict': { | |
242 | 'id': '2456', | |
243 | 'ext': 'mp4', | |
244 | 'title': 'New experience with Acrobat DC', | |
245 | 'description': 'New experience with Acrobat DC', | |
246 | 'duration': 248.667, | |
247 | }, | |
248 | } | |
249 | ||
250 | def _real_extract(self, url): | |
251 | video_id = self._match_id(url) | |
252 | webpage = self._download_webpage(url, video_id) | |
253 | ||
254 | video_data = self._parse_json(self._search_regex( | |
255 | r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | |
256 | title = video_data['title'] | |
257 | ||
258 | formats = [] | |
259 | sources = video_data.get('sources') or [] | |
260 | for source in sources: | |
261 | source_src = source.get('src') | |
262 | if not source_src: | |
263 | continue | |
264 | formats.append({ | |
265 | 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), | |
266 | 'format_id': join_nonempty(source.get('format'), source.get('label')), | |
267 | 'height': int_or_none(source.get('height') or None), | |
268 | 'tbr': int_or_none(source.get('bitrate') or None), | |
269 | 'width': int_or_none(source.get('width') or None), | |
270 | 'url': source_src, | |
271 | }) | |
272 | self._sort_formats(formats) | |
273 | ||
274 | # For both metadata and downloaded files the duration varies among | |
275 | # formats. I just pick the max one | |
276 | duration = max(filter(None, [ | |
277 | float_or_none(source.get('duration'), scale=1000) | |
278 | for source in sources])) | |
279 | ||
280 | return { | |
281 | 'id': video_id, | |
282 | 'formats': formats, | |
283 | 'title': title, | |
284 | 'description': video_data.get('description'), | |
285 | 'thumbnail': video_data.get('video', {}).get('poster'), | |
286 | 'duration': duration, | |
287 | 'subtitles': self._parse_subtitles(video_data, 'vttPath'), | |
288 | } |