]>
Commit | Line | Data |
---|---|---|
58bb4402 S |
1 | import re |
2 | ||
ef4fd848 | 3 | from .common import InfoExtractor |
5c2266df S |
4 | from ..utils import ( |
5 | ExtractorError, | |
7ded6545 | 6 | float_or_none, |
29f7c58a | 7 | int_or_none, |
8 | try_get, | |
58bb4402 | 9 | unescapeHTML, |
5c2266df | 10 | ) |
ef4fd848 PH |
11 | |
12 | ||
29f7c58a | 13 | class WistiaBaseIE(InfoExtractor): |
14 | _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})' | |
15 | _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/' | |
233826f6 | 16 | _EMBED_BASE_URL = 'http://fast.wistia.com/embed/' |
ef4fd848 | 17 | |
29f7c58a | 18 | def _download_embed_config(self, config_type, config_id, referer): |
19 | base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id) | |
20 | embed_config = self._download_json( | |
21 | base_url + '.json', config_id, headers={ | |
22 | 'Referer': referer if referer.startswith('http') else base_url, # Some videos require this. | |
f0c96af9 S |
23 | }) |
24 | ||
29f7c58a | 25 | if isinstance(embed_config, dict) and embed_config.get('error'): |
f0c96af9 S |
26 | raise ExtractorError( |
27 | 'Error while getting the playlist', expected=True) | |
28 | ||
29f7c58a | 29 | return embed_config |
30 | ||
31 | def _extract_media(self, embed_config): | |
32 | data = embed_config['media'] | |
33 | video_id = data['hashedId'] | |
cf45ed78 | 34 | title = data['name'] |
ef4fd848 PH |
35 | |
36 | formats = [] | |
37 | thumbnails = [] | |
66ca2cfd | 38 | for a in data['assets']: |
36ca2c55 S |
39 | aurl = a.get('url') |
40 | if not aurl: | |
41 | continue | |
cf45ed78 | 42 | astatus = a.get('status') |
66ca2cfd | 43 | atype = a.get('type') |
36ca2c55 | 44 | if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): |
cf45ed78 | 45 | continue |
46 | elif atype in ('still', 'still_image'): | |
ef4fd848 | 47 | thumbnails.append({ |
36ca2c55 S |
48 | 'url': aurl, |
49 | 'width': int_or_none(a.get('width')), | |
50 | 'height': int_or_none(a.get('height')), | |
233826f6 | 51 | 'filesize': int_or_none(a.get('size')), |
ef4fd848 | 52 | }) |
cf45ed78 | 53 | else: |
45f160a4 | 54 | aext = a.get('ext') |
233826f6 RA |
55 | display_name = a.get('display_name') |
56 | format_id = atype | |
57 | if atype and atype.endswith('_video') and display_name: | |
58 | format_id = '%s-%s' % (atype[:-6], display_name) | |
59 | f = { | |
60 | 'format_id': format_id, | |
36ca2c55 | 61 | 'url': aurl, |
233826f6 | 62 | 'tbr': int_or_none(a.get('bitrate')) or None, |
f983b875 | 63 | 'quality': 1 if atype == 'original' else None, |
233826f6 RA |
64 | } |
65 | if display_name == 'Audio': | |
66 | f.update({ | |
67 | 'vcodec': 'none', | |
68 | }) | |
69 | else: | |
70 | f.update({ | |
71 | 'width': int_or_none(a.get('width')), | |
72 | 'height': int_or_none(a.get('height')), | |
73 | 'vcodec': a.get('codec'), | |
74 | }) | |
75 | if a.get('container') == 'm3u8' or aext == 'm3u8': | |
76 | ts_f = f.copy() | |
77 | ts_f.update({ | |
78 | 'ext': 'ts', | |
79 | 'format_id': f['format_id'].replace('hls-', 'ts-'), | |
80 | 'url': f['url'].replace('.bin', '.ts'), | |
81 | }) | |
82 | formats.append(ts_f) | |
83 | f.update({ | |
84 | 'ext': 'mp4', | |
85 | 'protocol': 'm3u8_native', | |
86 | }) | |
87 | else: | |
88 | f.update({ | |
89 | 'container': a.get('container'), | |
90 | 'ext': aext, | |
91 | 'filesize': int_or_none(a.get('size')), | |
92 | }) | |
93 | formats.append(f) | |
539179f4 PH |
94 | |
95 | self._sort_formats(formats) | |
ef4fd848 | 96 | |
233826f6 RA |
97 | subtitles = {} |
98 | for caption in data.get('captions', []): | |
99 | language = caption.get('language') | |
100 | if not language: | |
101 | continue | |
102 | subtitles[language] = [{ | |
103 | 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language, | |
104 | }] | |
105 | ||
ef4fd848 PH |
106 | return { |
107 | 'id': video_id, | |
cf45ed78 | 108 | 'title': title, |
109 | 'description': data.get('seoDescription'), | |
ef4fd848 PH |
110 | 'formats': formats, |
111 | 'thumbnails': thumbnails, | |
7ded6545 | 112 | 'duration': float_or_none(data.get('duration')), |
cf45ed78 | 113 | 'timestamp': int_or_none(data.get('createdAt')), |
233826f6 | 114 | 'subtitles': subtitles, |
ef4fd848 | 115 | } |
29f7c58a | 116 | |
117 | ||
118 | class WistiaIE(WistiaBaseIE): | |
119 | _VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) | |
120 | ||
121 | _TESTS = [{ | |
122 | # with hls video | |
123 | 'url': 'wistia:807fafadvk', | |
124 | 'md5': 'daff0f3687a41d9a71b40e0e8c2610fe', | |
125 | 'info_dict': { | |
126 | 'id': '807fafadvk', | |
127 | 'ext': 'mp4', | |
128 | 'title': 'Drip Brennan Dunn Workshop', | |
129 | 'description': 'a JV Webinars video', | |
130 | 'upload_date': '20160518', | |
131 | 'timestamp': 1463607249, | |
132 | 'duration': 4987.11, | |
133 | }, | |
134 | }, { | |
135 | 'url': 'wistia:sh7fpupwlt', | |
136 | 'only_matching': True, | |
137 | }, { | |
138 | 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', | |
139 | 'only_matching': True, | |
140 | }, { | |
141 | 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', | |
142 | 'only_matching': True, | |
143 | }, { | |
144 | 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', | |
145 | 'only_matching': True, | |
146 | }] | |
147 | ||
148 | # https://wistia.com/support/embed-and-share/video-on-your-website | |
149 | @staticmethod | |
150 | def _extract_url(webpage): | |
151 | urls = WistiaIE._extract_urls(webpage) | |
152 | return urls[0] if urls else None | |
153 | ||
154 | @staticmethod | |
155 | def _extract_urls(webpage): | |
156 | urls = [] | |
157 | for match in re.finditer( | |
158 | r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage): | |
159 | urls.append(unescapeHTML(match.group('url'))) | |
160 | for match in re.finditer( | |
161 | r'''(?sx) | |
162 | <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1 | |
163 | ''', webpage): | |
164 | urls.append('wistia:%s' % match.group('id')) | |
165 | for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage): | |
166 | urls.append('wistia:%s' % match.group('id')) | |
167 | return urls | |
168 | ||
169 | def _real_extract(self, url): | |
170 | video_id = self._match_id(url) | |
171 | embed_config = self._download_embed_config('media', video_id, url) | |
172 | return self._extract_media(embed_config) | |
173 | ||
174 | ||
175 | class WistiaPlaylistIE(WistiaBaseIE): | |
176 | _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX) | |
177 | ||
178 | _TEST = { | |
179 | 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc', | |
180 | 'info_dict': { | |
181 | 'id': 'aodt9etokc', | |
182 | }, | |
183 | 'playlist_count': 3, | |
184 | } | |
185 | ||
186 | def _real_extract(self, url): | |
187 | playlist_id = self._match_id(url) | |
188 | playlist = self._download_embed_config('playlist', playlist_id, url) | |
189 | ||
190 | entries = [] | |
191 | for media in (try_get(playlist, lambda x: x[0]['medias']) or []): | |
192 | embed_config = media.get('embed_config') | |
193 | if not embed_config: | |
194 | continue | |
195 | entries.append(self._extract_media(embed_config)) | |
196 | ||
197 | return self.playlist_result(entries, playlist_id) |