]>
Commit | Line | Data |
---|---|---|
e423e0ba S |
1 | from __future__ import unicode_literals |
2 | ||
58bb4402 S |
3 | import re |
4 | ||
ef4fd848 | 5 | from .common import InfoExtractor |
5c2266df S |
6 | from ..utils import ( |
7 | ExtractorError, | |
7ded6545 | 8 | float_or_none, |
29f7c58a | 9 | int_or_none, |
10 | try_get, | |
58bb4402 | 11 | unescapeHTML, |
5c2266df | 12 | ) |
ef4fd848 PH |
13 | |
14 | ||
29f7c58a | 15 | class WistiaBaseIE(InfoExtractor): |
16 | _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})' | |
17 | _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/' | |
233826f6 | 18 | _EMBED_BASE_URL = 'http://fast.wistia.com/embed/' |
ef4fd848 | 19 | |
29f7c58a | 20 | def _download_embed_config(self, config_type, config_id, referer): |
21 | base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id) | |
22 | embed_config = self._download_json( | |
23 | base_url + '.json', config_id, headers={ | |
24 | 'Referer': referer if referer.startswith('http') else base_url, # Some videos require this. | |
f0c96af9 S |
25 | }) |
26 | ||
29f7c58a | 27 | if isinstance(embed_config, dict) and embed_config.get('error'): |
f0c96af9 S |
28 | raise ExtractorError( |
29 | 'Error while getting the playlist', expected=True) | |
30 | ||
29f7c58a | 31 | return embed_config |
32 | ||
33 | def _extract_media(self, embed_config): | |
34 | data = embed_config['media'] | |
35 | video_id = data['hashedId'] | |
cf45ed78 | 36 | title = data['name'] |
ef4fd848 PH |
37 | |
38 | formats = [] | |
39 | thumbnails = [] | |
66ca2cfd | 40 | for a in data['assets']: |
36ca2c55 S |
41 | aurl = a.get('url') |
42 | if not aurl: | |
43 | continue | |
cf45ed78 | 44 | astatus = a.get('status') |
66ca2cfd | 45 | atype = a.get('type') |
36ca2c55 | 46 | if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): |
cf45ed78 | 47 | continue |
48 | elif atype in ('still', 'still_image'): | |
ef4fd848 | 49 | thumbnails.append({ |
36ca2c55 S |
50 | 'url': aurl, |
51 | 'width': int_or_none(a.get('width')), | |
52 | 'height': int_or_none(a.get('height')), | |
233826f6 | 53 | 'filesize': int_or_none(a.get('size')), |
ef4fd848 | 54 | }) |
cf45ed78 | 55 | else: |
45f160a4 | 56 | aext = a.get('ext') |
233826f6 RA |
57 | display_name = a.get('display_name') |
58 | format_id = atype | |
59 | if atype and atype.endswith('_video') and display_name: | |
60 | format_id = '%s-%s' % (atype[:-6], display_name) | |
61 | f = { | |
62 | 'format_id': format_id, | |
36ca2c55 | 63 | 'url': aurl, |
233826f6 | 64 | 'tbr': int_or_none(a.get('bitrate')) or None, |
f983b875 | 65 | 'quality': 1 if atype == 'original' else None, |
233826f6 RA |
66 | } |
67 | if display_name == 'Audio': | |
68 | f.update({ | |
69 | 'vcodec': 'none', | |
70 | }) | |
71 | else: | |
72 | f.update({ | |
73 | 'width': int_or_none(a.get('width')), | |
74 | 'height': int_or_none(a.get('height')), | |
75 | 'vcodec': a.get('codec'), | |
76 | }) | |
77 | if a.get('container') == 'm3u8' or aext == 'm3u8': | |
78 | ts_f = f.copy() | |
79 | ts_f.update({ | |
80 | 'ext': 'ts', | |
81 | 'format_id': f['format_id'].replace('hls-', 'ts-'), | |
82 | 'url': f['url'].replace('.bin', '.ts'), | |
83 | }) | |
84 | formats.append(ts_f) | |
85 | f.update({ | |
86 | 'ext': 'mp4', | |
87 | 'protocol': 'm3u8_native', | |
88 | }) | |
89 | else: | |
90 | f.update({ | |
91 | 'container': a.get('container'), | |
92 | 'ext': aext, | |
93 | 'filesize': int_or_none(a.get('size')), | |
94 | }) | |
95 | formats.append(f) | |
539179f4 PH |
96 | |
97 | self._sort_formats(formats) | |
ef4fd848 | 98 | |
233826f6 RA |
99 | subtitles = {} |
100 | for caption in data.get('captions', []): | |
101 | language = caption.get('language') | |
102 | if not language: | |
103 | continue | |
104 | subtitles[language] = [{ | |
105 | 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language, | |
106 | }] | |
107 | ||
ef4fd848 PH |
108 | return { |
109 | 'id': video_id, | |
cf45ed78 | 110 | 'title': title, |
111 | 'description': data.get('seoDescription'), | |
ef4fd848 PH |
112 | 'formats': formats, |
113 | 'thumbnails': thumbnails, | |
7ded6545 | 114 | 'duration': float_or_none(data.get('duration')), |
cf45ed78 | 115 | 'timestamp': int_or_none(data.get('createdAt')), |
233826f6 | 116 | 'subtitles': subtitles, |
ef4fd848 | 117 | } |
29f7c58a | 118 | |
119 | ||
120 | class WistiaIE(WistiaBaseIE): | |
121 | _VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) | |
122 | ||
123 | _TESTS = [{ | |
124 | # with hls video | |
125 | 'url': 'wistia:807fafadvk', | |
126 | 'md5': 'daff0f3687a41d9a71b40e0e8c2610fe', | |
127 | 'info_dict': { | |
128 | 'id': '807fafadvk', | |
129 | 'ext': 'mp4', | |
130 | 'title': 'Drip Brennan Dunn Workshop', | |
131 | 'description': 'a JV Webinars video', | |
132 | 'upload_date': '20160518', | |
133 | 'timestamp': 1463607249, | |
134 | 'duration': 4987.11, | |
135 | }, | |
136 | }, { | |
137 | 'url': 'wistia:sh7fpupwlt', | |
138 | 'only_matching': True, | |
139 | }, { | |
140 | 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', | |
141 | 'only_matching': True, | |
142 | }, { | |
143 | 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', | |
144 | 'only_matching': True, | |
145 | }, { | |
146 | 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', | |
147 | 'only_matching': True, | |
148 | }] | |
149 | ||
150 | # https://wistia.com/support/embed-and-share/video-on-your-website | |
151 | @staticmethod | |
152 | def _extract_url(webpage): | |
153 | urls = WistiaIE._extract_urls(webpage) | |
154 | return urls[0] if urls else None | |
155 | ||
156 | @staticmethod | |
157 | def _extract_urls(webpage): | |
158 | urls = [] | |
159 | for match in re.finditer( | |
160 | r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage): | |
161 | urls.append(unescapeHTML(match.group('url'))) | |
162 | for match in re.finditer( | |
163 | r'''(?sx) | |
164 | <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1 | |
165 | ''', webpage): | |
166 | urls.append('wistia:%s' % match.group('id')) | |
167 | for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage): | |
168 | urls.append('wistia:%s' % match.group('id')) | |
169 | return urls | |
170 | ||
171 | def _real_extract(self, url): | |
172 | video_id = self._match_id(url) | |
173 | embed_config = self._download_embed_config('media', video_id, url) | |
174 | return self._extract_media(embed_config) | |
175 | ||
176 | ||
177 | class WistiaPlaylistIE(WistiaBaseIE): | |
178 | _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX) | |
179 | ||
180 | _TEST = { | |
181 | 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc', | |
182 | 'info_dict': { | |
183 | 'id': 'aodt9etokc', | |
184 | }, | |
185 | 'playlist_count': 3, | |
186 | } | |
187 | ||
188 | def _real_extract(self, url): | |
189 | playlist_id = self._match_id(url) | |
190 | playlist = self._download_embed_config('playlist', playlist_id, url) | |
191 | ||
192 | entries = [] | |
193 | for media in (try_get(playlist, lambda x: x[0]['medias']) or []): | |
194 | embed_config = media.get('embed_config') | |
195 | if not embed_config: | |
196 | continue | |
197 | entries.append(self._extract_media(embed_config)) | |
198 | ||
199 | return self.playlist_result(entries, playlist_id) |