]>
Commit | Line | Data |
---|---|---|
7f641d2c | 1 | import functools |
30bd1c16 | 2 | import re |
3 | ||
1362bbbb | 4 | from .common import InfoExtractor |
7079f8ff | 5 | from ..compat import compat_str |
1362bbbb | 6 | from ..utils import ( |
607841af | 7 | float_or_none, |
7f641d2c | 8 | int_or_none, |
607841af | 9 | ISO639Utils, |
34921b43 | 10 | join_nonempty, |
7f641d2c RA |
11 | OnDemandPagedList, |
12 | parse_duration, | |
13 | str_or_none, | |
14 | str_to_int, | |
15 | unified_strdate, | |
1362bbbb S |
16 | ) |
17 | ||
18 | ||
2c3b9f35 | 19 | class AdobeTVBaseIE(InfoExtractor): |
7f641d2c RA |
20 | def _call_api(self, path, video_id, query, note=None): |
21 | return self._download_json( | |
22 | 'http://tv.adobe.com/api/v4/' + path, | |
23 | video_id, note, query=query)['data'] | |
24 | ||
25 | def _parse_subtitles(self, video_data, url_key): | |
26 | subtitles = {} | |
27 | for translation in video_data.get('translations', []): | |
28 | vtt_path = translation.get(url_key) | |
29 | if not vtt_path: | |
30 | continue | |
31 | lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) | |
32 | subtitles.setdefault(lang, []).append({ | |
33 | 'ext': 'vtt', | |
34 | 'url': vtt_path, | |
35 | }) | |
36 | return subtitles | |
37 | ||
38 | def _parse_video_data(self, video_data): | |
39 | video_id = compat_str(video_data['id']) | |
40 | title = video_data['title'] | |
41 | ||
42 | s3_extracted = False | |
43 | formats = [] | |
44 | for source in video_data.get('videos', []): | |
45 | source_url = source.get('url') | |
46 | if not source_url: | |
47 | continue | |
48 | f = { | |
49 | 'format_id': source.get('quality_level'), | |
50 | 'fps': int_or_none(source.get('frame_rate')), | |
51 | 'height': int_or_none(source.get('height')), | |
52 | 'tbr': int_or_none(source.get('video_data_rate')), | |
53 | 'width': int_or_none(source.get('width')), | |
54 | 'url': source_url, | |
55 | } | |
56 | original_filename = source.get('original_filename') | |
57 | if original_filename: | |
58 | if not (f.get('height') and f.get('width')): | |
59 | mobj = re.search(r'_(\d+)x(\d+)', original_filename) | |
60 | if mobj: | |
61 | f.update({ | |
62 | 'height': int(mobj.group(2)), | |
63 | 'width': int(mobj.group(1)), | |
64 | }) | |
65 | if original_filename.startswith('s3://') and not s3_extracted: | |
66 | formats.append({ | |
67 | 'format_id': 'original', | |
f983b875 | 68 | 'quality': 1, |
7f641d2c RA |
69 | 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), |
70 | }) | |
71 | s3_extracted = True | |
72 | formats.append(f) | |
73 | self._sort_formats(formats) | |
74 | ||
75 | return { | |
76 | 'id': video_id, | |
77 | 'title': title, | |
78 | 'description': video_data.get('description'), | |
79 | 'thumbnail': video_data.get('thumbnail'), | |
80 | 'upload_date': unified_strdate(video_data.get('start_date')), | |
81 | 'duration': parse_duration(video_data.get('duration')), | |
82 | 'view_count': str_to_int(video_data.get('playcount')), | |
83 | 'formats': formats, | |
84 | 'subtitles': self._parse_subtitles(video_data, 'vtt'), | |
85 | } | |
86 | ||
87 | ||
88 | class AdobeTVEmbedIE(AdobeTVBaseIE): | |
89 | IE_NAME = 'adobetv:embed' | |
90 | _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' | |
91 | _TEST = { | |
92 | 'url': 'https://tv.adobe.com/embed/22/4153', | |
93 | 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', | |
94 | 'info_dict': { | |
95 | 'id': '4153', | |
96 | 'ext': 'flv', | |
97 | 'title': 'Creating Graphics Optimized for BlackBerry', | |
98 | 'description': 'md5:eac6e8dced38bdaae51cd94447927459', | |
99 | 'thumbnail': r're:https?://.*\.jpg$', | |
100 | 'upload_date': '20091109', | |
101 | 'duration': 377, | |
102 | 'view_count': int, | |
103 | }, | |
104 | } | |
105 | ||
106 | def _real_extract(self, url): | |
107 | video_id = self._match_id(url) | |
108 | ||
109 | video_data = self._call_api( | |
110 | 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] | |
111 | return self._parse_video_data(video_data) | |
2c3b9f35 | 112 | |
113 | ||
114 | class AdobeTVIE(AdobeTVBaseIE): | |
7f641d2c | 115 | IE_NAME = 'adobetv' |
30bd1c16 | 116 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' |
1362bbbb S |
117 | |
118 | _TEST = { | |
119 | 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', | |
120 | 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', | |
121 | 'info_dict': { | |
30bd1c16 | 122 | 'id': '10981', |
1362bbbb S |
123 | 'ext': 'mp4', |
124 | 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', | |
125 | 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', | |
ec85ded8 | 126 | 'thumbnail': r're:https?://.*\.jpg$', |
1362bbbb S |
127 | 'upload_date': '20110914', |
128 | 'duration': 60, | |
129 | 'view_count': int, | |
130 | }, | |
131 | } | |
132 | ||
133 | def _real_extract(self, url): | |
5ad28e7f | 134 | language, show_urlname, urlname = self._match_valid_url(url).groups() |
30bd1c16 | 135 | if not language: |
136 | language = 'en' | |
1362bbbb | 137 | |
7f641d2c RA |
138 | video_data = self._call_api( |
139 | 'episode/get', urlname, { | |
140 | 'disclosure': 'standard', | |
141 | 'language': language, | |
142 | 'show_urlname': show_urlname, | |
143 | 'urlname': urlname, | |
144 | })[0] | |
145 | return self._parse_video_data(video_data) | |
607841af YCH |
146 | |
147 | ||
2c3b9f35 | 148 | class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): |
7f641d2c RA |
149 | _PAGE_SIZE = 25 |
150 | ||
151 | def _fetch_page(self, display_id, query, page): | |
152 | page += 1 | |
153 | query['page'] = page | |
154 | for element_data in self._call_api( | |
155 | self._RESOURCE, display_id, query, 'Download Page %d' % page): | |
156 | yield self._process_data(element_data) | |
9a605c88 | 157 | |
7f641d2c RA |
158 | def _extract_playlist_entries(self, display_id, query): |
159 | return OnDemandPagedList(functools.partial( | |
160 | self._fetch_page, display_id, query), self._PAGE_SIZE) | |
9a605c88 | 161 | |
162 | ||
163 | class AdobeTVShowIE(AdobeTVPlaylistBaseIE): | |
7f641d2c | 164 | IE_NAME = 'adobetv:show' |
9a605c88 | 165 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' |
166 | ||
167 | _TEST = { | |
168 | 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', | |
169 | 'info_dict': { | |
170 | 'id': '36', | |
171 | 'title': 'The Complete Picture with Julieanne Kost', | |
172 | 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', | |
173 | }, | |
174 | 'playlist_mincount': 136, | |
175 | } | |
7f641d2c RA |
176 | _RESOURCE = 'episode' |
177 | _process_data = AdobeTVBaseIE._parse_video_data | |
9a605c88 | 178 | |
179 | def _real_extract(self, url): | |
5ad28e7f | 180 | language, show_urlname = self._match_valid_url(url).groups() |
9a605c88 | 181 | if not language: |
182 | language = 'en' | |
7f641d2c RA |
183 | query = { |
184 | 'disclosure': 'standard', | |
185 | 'language': language, | |
186 | 'show_urlname': show_urlname, | |
187 | } | |
9a605c88 | 188 | |
7f641d2c RA |
189 | show_data = self._call_api( |
190 | 'show/get', show_urlname, query)[0] | |
9a605c88 | 191 | |
192 | return self.playlist_result( | |
7f641d2c RA |
193 | self._extract_playlist_entries(show_urlname, query), |
194 | str_or_none(show_data.get('id')), | |
195 | show_data.get('show_name'), | |
196 | show_data.get('show_description')) | |
9a605c88 | 197 | |
198 | ||
199 | class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): | |
7f641d2c | 200 | IE_NAME = 'adobetv:channel' |
9a605c88 | 201 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' |
202 | ||
203 | _TEST = { | |
204 | 'url': 'http://tv.adobe.com/channel/development', | |
205 | 'info_dict': { | |
206 | 'id': 'development', | |
207 | }, | |
208 | 'playlist_mincount': 96, | |
209 | } | |
7f641d2c | 210 | _RESOURCE = 'show' |
9a605c88 | 211 | |
7f641d2c RA |
212 | def _process_data(self, show_data): |
213 | return self.url_result( | |
214 | show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) | |
9a605c88 | 215 | |
216 | def _real_extract(self, url): | |
5ad28e7f | 217 | language, channel_urlname, category_urlname = self._match_valid_url(url).groups() |
9a605c88 | 218 | if not language: |
219 | language = 'en' | |
7f641d2c RA |
220 | query = { |
221 | 'channel_urlname': channel_urlname, | |
222 | 'language': language, | |
223 | } | |
9a605c88 | 224 | if category_urlname: |
7f641d2c | 225 | query['category_urlname'] = category_urlname |
9a605c88 | 226 | |
227 | return self.playlist_result( | |
7f641d2c | 228 | self._extract_playlist_entries(channel_urlname, query), |
9a605c88 | 229 | channel_urlname) |
230 | ||
231 | ||
7f641d2c RA |
232 | class AdobeTVVideoIE(AdobeTVBaseIE): |
233 | IE_NAME = 'adobetv:video' | |
607841af | 234 | _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' |
bfd973ec | 235 | _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]'] |
607841af YCH |
236 | |
237 | _TEST = { | |
a5158f38 | 238 | # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners |
607841af YCH |
239 | 'url': 'https://video.tv.adobe.com/v/2456/', |
240 | 'md5': '43662b577c018ad707a63766462b1e87', | |
241 | 'info_dict': { | |
242 | 'id': '2456', | |
243 | 'ext': 'mp4', | |
244 | 'title': 'New experience with Acrobat DC', | |
245 | 'description': 'New experience with Acrobat DC', | |
246 | 'duration': 248.667, | |
247 | }, | |
248 | } | |
249 | ||
250 | def _real_extract(self, url): | |
251 | video_id = self._match_id(url) | |
26264cb0 YCH |
252 | webpage = self._download_webpage(url, video_id) |
253 | ||
254 | video_data = self._parse_json(self._search_regex( | |
255 | r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | |
7f641d2c RA |
256 | title = video_data['title'] |
257 | ||
258 | formats = [] | |
259 | sources = video_data.get('sources') or [] | |
260 | for source in sources: | |
261 | source_src = source.get('src') | |
262 | if not source_src: | |
263 | continue | |
264 | formats.append({ | |
265 | 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), | |
34921b43 | 266 | 'format_id': join_nonempty(source.get('format'), source.get('label')), |
7f641d2c RA |
267 | 'height': int_or_none(source.get('height') or None), |
268 | 'tbr': int_or_none(source.get('bitrate') or None), | |
269 | 'width': int_or_none(source.get('width') or None), | |
270 | 'url': source_src, | |
271 | }) | |
402ca40c | 272 | self._sort_formats(formats) |
607841af YCH |
273 | |
274 | # For both metadata and downloaded files the duration varies among | |
275 | # formats. I just pick the max one | |
276 | duration = max(filter(None, [ | |
277 | float_or_none(source.get('duration'), scale=1000) | |
7f641d2c | 278 | for source in sources])) |
607841af YCH |
279 | |
280 | return { | |
281 | 'id': video_id, | |
282 | 'formats': formats, | |
7f641d2c | 283 | 'title': title, |
402ca40c | 284 | 'description': video_data.get('description'), |
7f641d2c | 285 | 'thumbnail': video_data.get('video', {}).get('poster'), |
607841af | 286 | 'duration': duration, |
7f641d2c | 287 | 'subtitles': self._parse_subtitles(video_data, 'vttPath'), |
607841af | 288 | } |