]>
Commit | Line | Data |
---|---|---|
1362bbbb S |
1 | from __future__ import unicode_literals |
2 | ||
7f641d2c | 3 | import functools |
30bd1c16 | 4 | import re |
5 | ||
1362bbbb | 6 | from .common import InfoExtractor |
7079f8ff | 7 | from ..compat import compat_str |
1362bbbb | 8 | from ..utils import ( |
607841af | 9 | float_or_none, |
7f641d2c | 10 | int_or_none, |
607841af | 11 | ISO639Utils, |
34921b43 | 12 | join_nonempty, |
7f641d2c RA |
13 | OnDemandPagedList, |
14 | parse_duration, | |
15 | str_or_none, | |
16 | str_to_int, | |
17 | unified_strdate, | |
1362bbbb S |
18 | ) |
19 | ||
20 | ||
2c3b9f35 | 21 | class AdobeTVBaseIE(InfoExtractor): |
7f641d2c RA |
22 | def _call_api(self, path, video_id, query, note=None): |
23 | return self._download_json( | |
24 | 'http://tv.adobe.com/api/v4/' + path, | |
25 | video_id, note, query=query)['data'] | |
26 | ||
27 | def _parse_subtitles(self, video_data, url_key): | |
28 | subtitles = {} | |
29 | for translation in video_data.get('translations', []): | |
30 | vtt_path = translation.get(url_key) | |
31 | if not vtt_path: | |
32 | continue | |
33 | lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) | |
34 | subtitles.setdefault(lang, []).append({ | |
35 | 'ext': 'vtt', | |
36 | 'url': vtt_path, | |
37 | }) | |
38 | return subtitles | |
39 | ||
40 | def _parse_video_data(self, video_data): | |
41 | video_id = compat_str(video_data['id']) | |
42 | title = video_data['title'] | |
43 | ||
44 | s3_extracted = False | |
45 | formats = [] | |
46 | for source in video_data.get('videos', []): | |
47 | source_url = source.get('url') | |
48 | if not source_url: | |
49 | continue | |
50 | f = { | |
51 | 'format_id': source.get('quality_level'), | |
52 | 'fps': int_or_none(source.get('frame_rate')), | |
53 | 'height': int_or_none(source.get('height')), | |
54 | 'tbr': int_or_none(source.get('video_data_rate')), | |
55 | 'width': int_or_none(source.get('width')), | |
56 | 'url': source_url, | |
57 | } | |
58 | original_filename = source.get('original_filename') | |
59 | if original_filename: | |
60 | if not (f.get('height') and f.get('width')): | |
61 | mobj = re.search(r'_(\d+)x(\d+)', original_filename) | |
62 | if mobj: | |
63 | f.update({ | |
64 | 'height': int(mobj.group(2)), | |
65 | 'width': int(mobj.group(1)), | |
66 | }) | |
67 | if original_filename.startswith('s3://') and not s3_extracted: | |
68 | formats.append({ | |
69 | 'format_id': 'original', | |
f983b875 | 70 | 'quality': 1, |
7f641d2c RA |
71 | 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), |
72 | }) | |
73 | s3_extracted = True | |
74 | formats.append(f) | |
75 | self._sort_formats(formats) | |
76 | ||
77 | return { | |
78 | 'id': video_id, | |
79 | 'title': title, | |
80 | 'description': video_data.get('description'), | |
81 | 'thumbnail': video_data.get('thumbnail'), | |
82 | 'upload_date': unified_strdate(video_data.get('start_date')), | |
83 | 'duration': parse_duration(video_data.get('duration')), | |
84 | 'view_count': str_to_int(video_data.get('playcount')), | |
85 | 'formats': formats, | |
86 | 'subtitles': self._parse_subtitles(video_data, 'vtt'), | |
87 | } | |
88 | ||
89 | ||
90 | class AdobeTVEmbedIE(AdobeTVBaseIE): | |
91 | IE_NAME = 'adobetv:embed' | |
92 | _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' | |
93 | _TEST = { | |
94 | 'url': 'https://tv.adobe.com/embed/22/4153', | |
95 | 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', | |
96 | 'info_dict': { | |
97 | 'id': '4153', | |
98 | 'ext': 'flv', | |
99 | 'title': 'Creating Graphics Optimized for BlackBerry', | |
100 | 'description': 'md5:eac6e8dced38bdaae51cd94447927459', | |
101 | 'thumbnail': r're:https?://.*\.jpg$', | |
102 | 'upload_date': '20091109', | |
103 | 'duration': 377, | |
104 | 'view_count': int, | |
105 | }, | |
106 | } | |
107 | ||
108 | def _real_extract(self, url): | |
109 | video_id = self._match_id(url) | |
110 | ||
111 | video_data = self._call_api( | |
112 | 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] | |
113 | return self._parse_video_data(video_data) | |
2c3b9f35 | 114 | |
115 | ||
116 | class AdobeTVIE(AdobeTVBaseIE): | |
7f641d2c | 117 | IE_NAME = 'adobetv' |
30bd1c16 | 118 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' |
1362bbbb S |
119 | |
120 | _TEST = { | |
121 | 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', | |
122 | 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', | |
123 | 'info_dict': { | |
30bd1c16 | 124 | 'id': '10981', |
1362bbbb S |
125 | 'ext': 'mp4', |
126 | 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', | |
127 | 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', | |
ec85ded8 | 128 | 'thumbnail': r're:https?://.*\.jpg$', |
1362bbbb S |
129 | 'upload_date': '20110914', |
130 | 'duration': 60, | |
131 | 'view_count': int, | |
132 | }, | |
133 | } | |
134 | ||
135 | def _real_extract(self, url): | |
5ad28e7f | 136 | language, show_urlname, urlname = self._match_valid_url(url).groups() |
30bd1c16 | 137 | if not language: |
138 | language = 'en' | |
1362bbbb | 139 | |
7f641d2c RA |
140 | video_data = self._call_api( |
141 | 'episode/get', urlname, { | |
142 | 'disclosure': 'standard', | |
143 | 'language': language, | |
144 | 'show_urlname': show_urlname, | |
145 | 'urlname': urlname, | |
146 | })[0] | |
147 | return self._parse_video_data(video_data) | |
607841af YCH |
148 | |
149 | ||
2c3b9f35 | 150 | class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): |
7f641d2c RA |
151 | _PAGE_SIZE = 25 |
152 | ||
153 | def _fetch_page(self, display_id, query, page): | |
154 | page += 1 | |
155 | query['page'] = page | |
156 | for element_data in self._call_api( | |
157 | self._RESOURCE, display_id, query, 'Download Page %d' % page): | |
158 | yield self._process_data(element_data) | |
9a605c88 | 159 | |
7f641d2c RA |
160 | def _extract_playlist_entries(self, display_id, query): |
161 | return OnDemandPagedList(functools.partial( | |
162 | self._fetch_page, display_id, query), self._PAGE_SIZE) | |
9a605c88 | 163 | |
164 | ||
165 | class AdobeTVShowIE(AdobeTVPlaylistBaseIE): | |
7f641d2c | 166 | IE_NAME = 'adobetv:show' |
9a605c88 | 167 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' |
168 | ||
169 | _TEST = { | |
170 | 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', | |
171 | 'info_dict': { | |
172 | 'id': '36', | |
173 | 'title': 'The Complete Picture with Julieanne Kost', | |
174 | 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', | |
175 | }, | |
176 | 'playlist_mincount': 136, | |
177 | } | |
7f641d2c RA |
178 | _RESOURCE = 'episode' |
179 | _process_data = AdobeTVBaseIE._parse_video_data | |
9a605c88 | 180 | |
181 | def _real_extract(self, url): | |
5ad28e7f | 182 | language, show_urlname = self._match_valid_url(url).groups() |
9a605c88 | 183 | if not language: |
184 | language = 'en' | |
7f641d2c RA |
185 | query = { |
186 | 'disclosure': 'standard', | |
187 | 'language': language, | |
188 | 'show_urlname': show_urlname, | |
189 | } | |
9a605c88 | 190 | |
7f641d2c RA |
191 | show_data = self._call_api( |
192 | 'show/get', show_urlname, query)[0] | |
9a605c88 | 193 | |
194 | return self.playlist_result( | |
7f641d2c RA |
195 | self._extract_playlist_entries(show_urlname, query), |
196 | str_or_none(show_data.get('id')), | |
197 | show_data.get('show_name'), | |
198 | show_data.get('show_description')) | |
9a605c88 | 199 | |
200 | ||
201 | class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): | |
7f641d2c | 202 | IE_NAME = 'adobetv:channel' |
9a605c88 | 203 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' |
204 | ||
205 | _TEST = { | |
206 | 'url': 'http://tv.adobe.com/channel/development', | |
207 | 'info_dict': { | |
208 | 'id': 'development', | |
209 | }, | |
210 | 'playlist_mincount': 96, | |
211 | } | |
7f641d2c | 212 | _RESOURCE = 'show' |
9a605c88 | 213 | |
7f641d2c RA |
214 | def _process_data(self, show_data): |
215 | return self.url_result( | |
216 | show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) | |
9a605c88 | 217 | |
218 | def _real_extract(self, url): | |
5ad28e7f | 219 | language, channel_urlname, category_urlname = self._match_valid_url(url).groups() |
9a605c88 | 220 | if not language: |
221 | language = 'en' | |
7f641d2c RA |
222 | query = { |
223 | 'channel_urlname': channel_urlname, | |
224 | 'language': language, | |
225 | } | |
9a605c88 | 226 | if category_urlname: |
7f641d2c | 227 | query['category_urlname'] = category_urlname |
9a605c88 | 228 | |
229 | return self.playlist_result( | |
7f641d2c | 230 | self._extract_playlist_entries(channel_urlname, query), |
9a605c88 | 231 | channel_urlname) |
232 | ||
233 | ||
7f641d2c RA |
234 | class AdobeTVVideoIE(AdobeTVBaseIE): |
235 | IE_NAME = 'adobetv:video' | |
607841af YCH |
236 | _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' |
237 | ||
238 | _TEST = { | |
a5158f38 | 239 | # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners |
607841af YCH |
240 | 'url': 'https://video.tv.adobe.com/v/2456/', |
241 | 'md5': '43662b577c018ad707a63766462b1e87', | |
242 | 'info_dict': { | |
243 | 'id': '2456', | |
244 | 'ext': 'mp4', | |
245 | 'title': 'New experience with Acrobat DC', | |
246 | 'description': 'New experience with Acrobat DC', | |
247 | 'duration': 248.667, | |
248 | }, | |
249 | } | |
250 | ||
251 | def _real_extract(self, url): | |
252 | video_id = self._match_id(url) | |
26264cb0 YCH |
253 | webpage = self._download_webpage(url, video_id) |
254 | ||
255 | video_data = self._parse_json(self._search_regex( | |
256 | r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | |
7f641d2c RA |
257 | title = video_data['title'] |
258 | ||
259 | formats = [] | |
260 | sources = video_data.get('sources') or [] | |
261 | for source in sources: | |
262 | source_src = source.get('src') | |
263 | if not source_src: | |
264 | continue | |
265 | formats.append({ | |
266 | 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), | |
34921b43 | 267 | 'format_id': join_nonempty(source.get('format'), source.get('label')), |
7f641d2c RA |
268 | 'height': int_or_none(source.get('height') or None), |
269 | 'tbr': int_or_none(source.get('bitrate') or None), | |
270 | 'width': int_or_none(source.get('width') or None), | |
271 | 'url': source_src, | |
272 | }) | |
402ca40c | 273 | self._sort_formats(formats) |
607841af YCH |
274 | |
275 | # For both metadata and downloaded files the duration varies among | |
276 | # formats. I just pick the max one | |
277 | duration = max(filter(None, [ | |
278 | float_or_none(source.get('duration'), scale=1000) | |
7f641d2c | 279 | for source in sources])) |
607841af YCH |
280 | |
281 | return { | |
282 | 'id': video_id, | |
283 | 'formats': formats, | |
7f641d2c | 284 | 'title': title, |
402ca40c | 285 | 'description': video_data.get('description'), |
7f641d2c | 286 | 'thumbnail': video_data.get('video', {}).get('poster'), |
607841af | 287 | 'duration': duration, |
7f641d2c | 288 | 'subtitles': self._parse_subtitles(video_data, 'vttPath'), |
607841af | 289 | } |