]>
Commit | Line | Data |
---|---|---|
1362bbbb S |
1 | from __future__ import unicode_literals |
2 | ||
7f641d2c | 3 | import functools |
30bd1c16 | 4 | import re |
5 | ||
1362bbbb | 6 | from .common import InfoExtractor |
7079f8ff | 7 | from ..compat import compat_str |
1362bbbb | 8 | from ..utils import ( |
607841af | 9 | float_or_none, |
7f641d2c | 10 | int_or_none, |
607841af | 11 | ISO639Utils, |
7f641d2c RA |
12 | OnDemandPagedList, |
13 | parse_duration, | |
14 | str_or_none, | |
15 | str_to_int, | |
16 | unified_strdate, | |
1362bbbb S |
17 | ) |
18 | ||
19 | ||
2c3b9f35 | 20 | class AdobeTVBaseIE(InfoExtractor): |
7f641d2c RA |
21 | def _call_api(self, path, video_id, query, note=None): |
22 | return self._download_json( | |
23 | 'http://tv.adobe.com/api/v4/' + path, | |
24 | video_id, note, query=query)['data'] | |
25 | ||
26 | def _parse_subtitles(self, video_data, url_key): | |
27 | subtitles = {} | |
28 | for translation in video_data.get('translations', []): | |
29 | vtt_path = translation.get(url_key) | |
30 | if not vtt_path: | |
31 | continue | |
32 | lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) | |
33 | subtitles.setdefault(lang, []).append({ | |
34 | 'ext': 'vtt', | |
35 | 'url': vtt_path, | |
36 | }) | |
37 | return subtitles | |
38 | ||
39 | def _parse_video_data(self, video_data): | |
40 | video_id = compat_str(video_data['id']) | |
41 | title = video_data['title'] | |
42 | ||
43 | s3_extracted = False | |
44 | formats = [] | |
45 | for source in video_data.get('videos', []): | |
46 | source_url = source.get('url') | |
47 | if not source_url: | |
48 | continue | |
49 | f = { | |
50 | 'format_id': source.get('quality_level'), | |
51 | 'fps': int_or_none(source.get('frame_rate')), | |
52 | 'height': int_or_none(source.get('height')), | |
53 | 'tbr': int_or_none(source.get('video_data_rate')), | |
54 | 'width': int_or_none(source.get('width')), | |
55 | 'url': source_url, | |
56 | } | |
57 | original_filename = source.get('original_filename') | |
58 | if original_filename: | |
59 | if not (f.get('height') and f.get('width')): | |
60 | mobj = re.search(r'_(\d+)x(\d+)', original_filename) | |
61 | if mobj: | |
62 | f.update({ | |
63 | 'height': int(mobj.group(2)), | |
64 | 'width': int(mobj.group(1)), | |
65 | }) | |
66 | if original_filename.startswith('s3://') and not s3_extracted: | |
67 | formats.append({ | |
68 | 'format_id': 'original', | |
69 | 'preference': 1, | |
70 | 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), | |
71 | }) | |
72 | s3_extracted = True | |
73 | formats.append(f) | |
74 | self._sort_formats(formats) | |
75 | ||
76 | return { | |
77 | 'id': video_id, | |
78 | 'title': title, | |
79 | 'description': video_data.get('description'), | |
80 | 'thumbnail': video_data.get('thumbnail'), | |
81 | 'upload_date': unified_strdate(video_data.get('start_date')), | |
82 | 'duration': parse_duration(video_data.get('duration')), | |
83 | 'view_count': str_to_int(video_data.get('playcount')), | |
84 | 'formats': formats, | |
85 | 'subtitles': self._parse_subtitles(video_data, 'vtt'), | |
86 | } | |
87 | ||
88 | ||
89 | class AdobeTVEmbedIE(AdobeTVBaseIE): | |
90 | IE_NAME = 'adobetv:embed' | |
91 | _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' | |
92 | _TEST = { | |
93 | 'url': 'https://tv.adobe.com/embed/22/4153', | |
94 | 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', | |
95 | 'info_dict': { | |
96 | 'id': '4153', | |
97 | 'ext': 'flv', | |
98 | 'title': 'Creating Graphics Optimized for BlackBerry', | |
99 | 'description': 'md5:eac6e8dced38bdaae51cd94447927459', | |
100 | 'thumbnail': r're:https?://.*\.jpg$', | |
101 | 'upload_date': '20091109', | |
102 | 'duration': 377, | |
103 | 'view_count': int, | |
104 | }, | |
105 | } | |
106 | ||
107 | def _real_extract(self, url): | |
108 | video_id = self._match_id(url) | |
109 | ||
110 | video_data = self._call_api( | |
111 | 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] | |
112 | return self._parse_video_data(video_data) | |
2c3b9f35 | 113 | |
114 | ||
115 | class AdobeTVIE(AdobeTVBaseIE): | |
7f641d2c | 116 | IE_NAME = 'adobetv' |
30bd1c16 | 117 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' |
1362bbbb S |
118 | |
119 | _TEST = { | |
120 | 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', | |
121 | 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', | |
122 | 'info_dict': { | |
30bd1c16 | 123 | 'id': '10981', |
1362bbbb S |
124 | 'ext': 'mp4', |
125 | 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', | |
126 | 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', | |
ec85ded8 | 127 | 'thumbnail': r're:https?://.*\.jpg$', |
1362bbbb S |
128 | 'upload_date': '20110914', |
129 | 'duration': 60, | |
130 | 'view_count': int, | |
131 | }, | |
132 | } | |
133 | ||
134 | def _real_extract(self, url): | |
30bd1c16 | 135 | language, show_urlname, urlname = re.match(self._VALID_URL, url).groups() |
136 | if not language: | |
137 | language = 'en' | |
1362bbbb | 138 | |
7f641d2c RA |
139 | video_data = self._call_api( |
140 | 'episode/get', urlname, { | |
141 | 'disclosure': 'standard', | |
142 | 'language': language, | |
143 | 'show_urlname': show_urlname, | |
144 | 'urlname': urlname, | |
145 | })[0] | |
146 | return self._parse_video_data(video_data) | |
607841af YCH |
147 | |
148 | ||
2c3b9f35 | 149 | class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): |
7f641d2c RA |
150 | _PAGE_SIZE = 25 |
151 | ||
152 | def _fetch_page(self, display_id, query, page): | |
153 | page += 1 | |
154 | query['page'] = page | |
155 | for element_data in self._call_api( | |
156 | self._RESOURCE, display_id, query, 'Download Page %d' % page): | |
157 | yield self._process_data(element_data) | |
9a605c88 | 158 | |
7f641d2c RA |
159 | def _extract_playlist_entries(self, display_id, query): |
160 | return OnDemandPagedList(functools.partial( | |
161 | self._fetch_page, display_id, query), self._PAGE_SIZE) | |
9a605c88 | 162 | |
163 | ||
164 | class AdobeTVShowIE(AdobeTVPlaylistBaseIE): | |
7f641d2c | 165 | IE_NAME = 'adobetv:show' |
9a605c88 | 166 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' |
167 | ||
168 | _TEST = { | |
169 | 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', | |
170 | 'info_dict': { | |
171 | 'id': '36', | |
172 | 'title': 'The Complete Picture with Julieanne Kost', | |
173 | 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', | |
174 | }, | |
175 | 'playlist_mincount': 136, | |
176 | } | |
7f641d2c RA |
177 | _RESOURCE = 'episode' |
178 | _process_data = AdobeTVBaseIE._parse_video_data | |
9a605c88 | 179 | |
180 | def _real_extract(self, url): | |
181 | language, show_urlname = re.match(self._VALID_URL, url).groups() | |
182 | if not language: | |
183 | language = 'en' | |
7f641d2c RA |
184 | query = { |
185 | 'disclosure': 'standard', | |
186 | 'language': language, | |
187 | 'show_urlname': show_urlname, | |
188 | } | |
9a605c88 | 189 | |
7f641d2c RA |
190 | show_data = self._call_api( |
191 | 'show/get', show_urlname, query)[0] | |
9a605c88 | 192 | |
193 | return self.playlist_result( | |
7f641d2c RA |
194 | self._extract_playlist_entries(show_urlname, query), |
195 | str_or_none(show_data.get('id')), | |
196 | show_data.get('show_name'), | |
197 | show_data.get('show_description')) | |
9a605c88 | 198 | |
199 | ||
200 | class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): | |
7f641d2c | 201 | IE_NAME = 'adobetv:channel' |
9a605c88 | 202 | _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' |
203 | ||
204 | _TEST = { | |
205 | 'url': 'http://tv.adobe.com/channel/development', | |
206 | 'info_dict': { | |
207 | 'id': 'development', | |
208 | }, | |
209 | 'playlist_mincount': 96, | |
210 | } | |
7f641d2c | 211 | _RESOURCE = 'show' |
9a605c88 | 212 | |
7f641d2c RA |
213 | def _process_data(self, show_data): |
214 | return self.url_result( | |
215 | show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) | |
9a605c88 | 216 | |
217 | def _real_extract(self, url): | |
218 | language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() | |
219 | if not language: | |
220 | language = 'en' | |
7f641d2c RA |
221 | query = { |
222 | 'channel_urlname': channel_urlname, | |
223 | 'language': language, | |
224 | } | |
9a605c88 | 225 | if category_urlname: |
7f641d2c | 226 | query['category_urlname'] = category_urlname |
9a605c88 | 227 | |
228 | return self.playlist_result( | |
7f641d2c | 229 | self._extract_playlist_entries(channel_urlname, query), |
9a605c88 | 230 | channel_urlname) |
231 | ||
232 | ||
7f641d2c RA |
233 | class AdobeTVVideoIE(AdobeTVBaseIE): |
234 | IE_NAME = 'adobetv:video' | |
607841af YCH |
235 | _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' |
236 | ||
237 | _TEST = { | |
a5158f38 | 238 | # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners |
607841af YCH |
239 | 'url': 'https://video.tv.adobe.com/v/2456/', |
240 | 'md5': '43662b577c018ad707a63766462b1e87', | |
241 | 'info_dict': { | |
242 | 'id': '2456', | |
243 | 'ext': 'mp4', | |
244 | 'title': 'New experience with Acrobat DC', | |
245 | 'description': 'New experience with Acrobat DC', | |
246 | 'duration': 248.667, | |
247 | }, | |
248 | } | |
249 | ||
250 | def _real_extract(self, url): | |
251 | video_id = self._match_id(url) | |
26264cb0 YCH |
252 | webpage = self._download_webpage(url, video_id) |
253 | ||
254 | video_data = self._parse_json(self._search_regex( | |
255 | r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | |
7f641d2c RA |
256 | title = video_data['title'] |
257 | ||
258 | formats = [] | |
259 | sources = video_data.get('sources') or [] | |
260 | for source in sources: | |
261 | source_src = source.get('src') | |
262 | if not source_src: | |
263 | continue | |
264 | formats.append({ | |
265 | 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), | |
266 | 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])), | |
267 | 'height': int_or_none(source.get('height') or None), | |
268 | 'tbr': int_or_none(source.get('bitrate') or None), | |
269 | 'width': int_or_none(source.get('width') or None), | |
270 | 'url': source_src, | |
271 | }) | |
402ca40c | 272 | self._sort_formats(formats) |
607841af YCH |
273 | |
274 | # For both metadata and downloaded files the duration varies among | |
275 | # formats. I just pick the max one | |
276 | duration = max(filter(None, [ | |
277 | float_or_none(source.get('duration'), scale=1000) | |
7f641d2c | 278 | for source in sources])) |
607841af YCH |
279 | |
280 | return { | |
281 | 'id': video_id, | |
282 | 'formats': formats, | |
7f641d2c | 283 | 'title': title, |
402ca40c | 284 | 'description': video_data.get('description'), |
7f641d2c | 285 | 'thumbnail': video_data.get('video', {}).get('poster'), |
607841af | 286 | 'duration': duration, |
7f641d2c | 287 | 'subtitles': self._parse_subtitles(video_data, 'vttPath'), |
607841af | 288 | } |