]>
Commit | Line | Data |
---|---|---|
1 | import functools | |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | ISO639Utils, | |
7 | OnDemandPagedList, | |
8 | float_or_none, | |
9 | int_or_none, | |
10 | join_nonempty, | |
11 | parse_duration, | |
12 | str_or_none, | |
13 | str_to_int, | |
14 | unified_strdate, | |
15 | ) | |
16 | ||
17 | ||
18 | class AdobeTVBaseIE(InfoExtractor): | |
19 | def _call_api(self, path, video_id, query, note=None): | |
20 | return self._download_json( | |
21 | 'http://tv.adobe.com/api/v4/' + path, | |
22 | video_id, note, query=query)['data'] | |
23 | ||
24 | def _parse_subtitles(self, video_data, url_key): | |
25 | subtitles = {} | |
26 | for translation in video_data.get('translations', []): | |
27 | vtt_path = translation.get(url_key) | |
28 | if not vtt_path: | |
29 | continue | |
30 | lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) | |
31 | subtitles.setdefault(lang, []).append({ | |
32 | 'ext': 'vtt', | |
33 | 'url': vtt_path, | |
34 | }) | |
35 | return subtitles | |
36 | ||
37 | def _parse_video_data(self, video_data): | |
38 | video_id = str(video_data['id']) | |
39 | title = video_data['title'] | |
40 | ||
41 | s3_extracted = False | |
42 | formats = [] | |
43 | for source in video_data.get('videos', []): | |
44 | source_url = source.get('url') | |
45 | if not source_url: | |
46 | continue | |
47 | f = { | |
48 | 'format_id': source.get('quality_level'), | |
49 | 'fps': int_or_none(source.get('frame_rate')), | |
50 | 'height': int_or_none(source.get('height')), | |
51 | 'tbr': int_or_none(source.get('video_data_rate')), | |
52 | 'width': int_or_none(source.get('width')), | |
53 | 'url': source_url, | |
54 | } | |
55 | original_filename = source.get('original_filename') | |
56 | if original_filename: | |
57 | if not (f.get('height') and f.get('width')): | |
58 | mobj = re.search(r'_(\d+)x(\d+)', original_filename) | |
59 | if mobj: | |
60 | f.update({ | |
61 | 'height': int(mobj.group(2)), | |
62 | 'width': int(mobj.group(1)), | |
63 | }) | |
64 | if original_filename.startswith('s3://') and not s3_extracted: | |
65 | formats.append({ | |
66 | 'format_id': 'original', | |
67 | 'quality': 1, | |
68 | 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), | |
69 | }) | |
70 | s3_extracted = True | |
71 | formats.append(f) | |
72 | ||
73 | return { | |
74 | 'id': video_id, | |
75 | 'title': title, | |
76 | 'description': video_data.get('description'), | |
77 | 'thumbnail': video_data.get('thumbnail'), | |
78 | 'upload_date': unified_strdate(video_data.get('start_date')), | |
79 | 'duration': parse_duration(video_data.get('duration')), | |
80 | 'view_count': str_to_int(video_data.get('playcount')), | |
81 | 'formats': formats, | |
82 | 'subtitles': self._parse_subtitles(video_data, 'vtt'), | |
83 | } | |
84 | ||
85 | ||
86 | class AdobeTVEmbedIE(AdobeTVBaseIE): | |
87 | IE_NAME = 'adobetv:embed' | |
88 | _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' | |
89 | _TEST = { | |
90 | 'url': 'https://tv.adobe.com/embed/22/4153', | |
91 | 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', | |
92 | 'info_dict': { | |
93 | 'id': '4153', | |
94 | 'ext': 'flv', | |
95 | 'title': 'Creating Graphics Optimized for BlackBerry', | |
96 | 'description': 'md5:eac6e8dced38bdaae51cd94447927459', | |
97 | 'thumbnail': r're:https?://.*\.jpg$', | |
98 | 'upload_date': '20091109', | |
99 | 'duration': 377, | |
100 | 'view_count': int, | |
101 | }, | |
102 | } | |
103 | ||
104 | def _real_extract(self, url): | |
105 | video_id = self._match_id(url) | |
106 | ||
107 | video_data = self._call_api( | |
108 | 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] | |
109 | return self._parse_video_data(video_data) | |
110 | ||
111 | ||
112 | class AdobeTVIE(AdobeTVBaseIE): | |
113 | IE_NAME = 'adobetv' | |
114 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' | |
115 | ||
116 | _TEST = { | |
117 | 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', | |
118 | 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', | |
119 | 'info_dict': { | |
120 | 'id': '10981', | |
121 | 'ext': 'mp4', | |
122 | 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', | |
123 | 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', | |
124 | 'thumbnail': r're:https?://.*\.jpg$', | |
125 | 'upload_date': '20110914', | |
126 | 'duration': 60, | |
127 | 'view_count': int, | |
128 | }, | |
129 | } | |
130 | ||
131 | def _real_extract(self, url): | |
132 | language, show_urlname, urlname = self._match_valid_url(url).groups() | |
133 | if not language: | |
134 | language = 'en' | |
135 | ||
136 | video_data = self._call_api( | |
137 | 'episode/get', urlname, { | |
138 | 'disclosure': 'standard', | |
139 | 'language': language, | |
140 | 'show_urlname': show_urlname, | |
141 | 'urlname': urlname, | |
142 | })[0] | |
143 | return self._parse_video_data(video_data) | |
144 | ||
145 | ||
146 | class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): | |
147 | _PAGE_SIZE = 25 | |
148 | ||
149 | def _fetch_page(self, display_id, query, page): | |
150 | page += 1 | |
151 | query['page'] = page | |
152 | for element_data in self._call_api( | |
153 | self._RESOURCE, display_id, query, f'Download Page {page}'): | |
154 | yield self._process_data(element_data) | |
155 | ||
156 | def _extract_playlist_entries(self, display_id, query): | |
157 | return OnDemandPagedList(functools.partial( | |
158 | self._fetch_page, display_id, query), self._PAGE_SIZE) | |
159 | ||
160 | ||
161 | class AdobeTVShowIE(AdobeTVPlaylistBaseIE): | |
162 | IE_NAME = 'adobetv:show' | |
163 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' | |
164 | ||
165 | _TEST = { | |
166 | 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', | |
167 | 'info_dict': { | |
168 | 'id': '36', | |
169 | 'title': 'The Complete Picture with Julieanne Kost', | |
170 | 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', | |
171 | }, | |
172 | 'playlist_mincount': 136, | |
173 | } | |
174 | _RESOURCE = 'episode' | |
175 | _process_data = AdobeTVBaseIE._parse_video_data | |
176 | ||
177 | def _real_extract(self, url): | |
178 | language, show_urlname = self._match_valid_url(url).groups() | |
179 | if not language: | |
180 | language = 'en' | |
181 | query = { | |
182 | 'disclosure': 'standard', | |
183 | 'language': language, | |
184 | 'show_urlname': show_urlname, | |
185 | } | |
186 | ||
187 | show_data = self._call_api( | |
188 | 'show/get', show_urlname, query)[0] | |
189 | ||
190 | return self.playlist_result( | |
191 | self._extract_playlist_entries(show_urlname, query), | |
192 | str_or_none(show_data.get('id')), | |
193 | show_data.get('show_name'), | |
194 | show_data.get('show_description')) | |
195 | ||
196 | ||
197 | class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): | |
198 | IE_NAME = 'adobetv:channel' | |
199 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' | |
200 | ||
201 | _TEST = { | |
202 | 'url': 'http://tv.adobe.com/channel/development', | |
203 | 'info_dict': { | |
204 | 'id': 'development', | |
205 | }, | |
206 | 'playlist_mincount': 96, | |
207 | } | |
208 | _RESOURCE = 'show' | |
209 | ||
210 | def _process_data(self, show_data): | |
211 | return self.url_result( | |
212 | show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) | |
213 | ||
214 | def _real_extract(self, url): | |
215 | language, channel_urlname, category_urlname = self._match_valid_url(url).groups() | |
216 | if not language: | |
217 | language = 'en' | |
218 | query = { | |
219 | 'channel_urlname': channel_urlname, | |
220 | 'language': language, | |
221 | } | |
222 | if category_urlname: | |
223 | query['category_urlname'] = category_urlname | |
224 | ||
225 | return self.playlist_result( | |
226 | self._extract_playlist_entries(channel_urlname, query), | |
227 | channel_urlname) | |
228 | ||
229 | ||
230 | class AdobeTVVideoIE(AdobeTVBaseIE): | |
231 | IE_NAME = 'adobetv:video' | |
232 | _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' | |
233 | _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]'] | |
234 | ||
235 | _TEST = { | |
236 | # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners | |
237 | 'url': 'https://video.tv.adobe.com/v/2456/', | |
238 | 'md5': '43662b577c018ad707a63766462b1e87', | |
239 | 'info_dict': { | |
240 | 'id': '2456', | |
241 | 'ext': 'mp4', | |
242 | 'title': 'New experience with Acrobat DC', | |
243 | 'description': 'New experience with Acrobat DC', | |
244 | 'duration': 248.667, | |
245 | }, | |
246 | } | |
247 | ||
248 | def _real_extract(self, url): | |
249 | video_id = self._match_id(url) | |
250 | webpage = self._download_webpage(url, video_id) | |
251 | ||
252 | video_data = self._parse_json(self._search_regex( | |
253 | r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | |
254 | title = video_data['title'] | |
255 | ||
256 | formats = [] | |
257 | sources = video_data.get('sources') or [] | |
258 | for source in sources: | |
259 | source_src = source.get('src') | |
260 | if not source_src: | |
261 | continue | |
262 | formats.append({ | |
263 | 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), | |
264 | 'format_id': join_nonempty(source.get('format'), source.get('label')), | |
265 | 'height': int_or_none(source.get('height') or None), | |
266 | 'tbr': int_or_none(source.get('bitrate') or None), | |
267 | 'width': int_or_none(source.get('width') or None), | |
268 | 'url': source_src, | |
269 | }) | |
270 | ||
271 | # For both metadata and downloaded files the duration varies among | |
272 | # formats. I just pick the max one | |
273 | duration = max(filter(None, [ | |
274 | float_or_none(source.get('duration'), scale=1000) | |
275 | for source in sources])) | |
276 | ||
277 | return { | |
278 | 'id': video_id, | |
279 | 'formats': formats, | |
280 | 'title': title, | |
281 | 'description': video_data.get('description'), | |
282 | 'thumbnail': video_data.get('video', {}).get('poster'), | |
283 | 'duration': duration, | |
284 | 'subtitles': self._parse_subtitles(video_data, 'vttPath'), | |
285 | } |