]>
Commit | Line | Data |
---|---|---|
d6c2c2bc | 1 | import base64 |
2 | import urllib.parse | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..networking.exceptions import HTTPError | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | int_or_none, | |
9 | qualities, | |
10 | remove_start, | |
11 | smuggle_url, | |
12 | unsmuggle_url, | |
13 | update_url_query, | |
14 | url_or_none, | |
15 | urlencode_postdata, | |
16 | ) | |
17 | from ..utils.traversal import traverse_obj | |
18 | ||
19 | ||
20 | class SproutVideoIE(InfoExtractor): | |
21 | _NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+' | |
22 | _VALID_URL = rf'https?:{_NO_SCHEME_RE}' | |
23 | _EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']'] | |
24 | _TESTS = [{ | |
25 | 'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3', | |
26 | 'md5': '1343ce1a6cb39d67889bfa07c7b02b0e', | |
27 | 'info_dict': { | |
28 | 'id': '4c9dddb01910e3c9c4', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Adrien Labaeye : Berlin, des communautés aux communs', | |
31 | 'duration': 576, | |
32 | 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', | |
33 | }, | |
34 | }, { | |
35 | 'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27', | |
36 | 'md5': 'cebae5cf558cca83271917cf4ec03f26', | |
37 | 'info_dict': { | |
38 | 'id': 'a79fdcb21f1be2c62e', | |
39 | 'ext': 'mp4', | |
40 | 'title': 'HS_01_Live Stream 2023-01-14 10:00', | |
41 | 'duration': 703, | |
42 | 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', | |
43 | }, | |
44 | }, { | |
45 | # http formats 'sd' and 'hd' are available | |
46 | 'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90', | |
47 | 'md5': 'f368c78df07e78a749508b221528672c', | |
48 | 'info_dict': { | |
49 | 'id': '119cd6bc1a18e6cd98', | |
50 | 'ext': 'mp4', | |
51 | 'title': '3. Updating your Partner details', | |
52 | 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', | |
53 | 'duration': 60, | |
54 | }, | |
55 | 'params': {'format': 'hd'}, | |
56 | }, { | |
57 | # subtitles | |
58 | 'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd', | |
59 | 'md5': '7f6798f037d7a3e3e07e67959de68fc6', | |
60 | 'info_dict': { | |
61 | 'id': '119dd8ba121ee0cc98', | |
62 | 'ext': 'mp4', | |
63 | 'title': 'Recipients Setup - Domestic Wire Only', | |
64 | 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', | |
65 | 'duration': 77, | |
66 | 'subtitles': {'en': 'count:1'}, | |
67 | }, | |
68 | }] | |
69 | _WEBPAGE_TESTS = [{ | |
70 | 'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs', | |
71 | 'info_dict': { | |
72 | 'id': '4c9dddb01910e3c9c4', | |
73 | 'ext': 'mp4', | |
74 | 'title': 'Adrien Labaeye : Berlin, des communautés aux communs', | |
75 | 'duration': 576, | |
76 | 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', | |
77 | }, | |
78 | }] | |
79 | _M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8' | |
80 | _QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it | |
81 | ||
82 | @staticmethod | |
83 | def _policy_to_qs(policy, signature_key, as_string=False): | |
84 | query = {} | |
85 | for key, value in policy['signatures'][signature_key].items(): | |
86 | query[remove_start(key, 'CloudFront-')] = value | |
87 | query['sessionID'] = policy['sessionID'] | |
88 | return urllib.parse.urlencode(query, doseq=True) if as_string else query | |
89 | ||
90 | @classmethod | |
91 | def _extract_embed_urls(cls, url, webpage): | |
92 | for embed_url in super()._extract_embed_urls(url, webpage): | |
93 | if embed_url.startswith('//'): | |
94 | embed_url = f'https:{embed_url}' | |
95 | yield smuggle_url(embed_url, {'referer': url}) | |
96 | ||
97 | def _real_extract(self, url): | |
98 | url, smuggled_data = unsmuggle_url(url, {}) | |
99 | video_id = self._match_id(url) | |
100 | webpage = self._download_webpage( | |
101 | url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) | |
102 | data = self._search_json( | |
103 | r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', | |
104 | end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) | |
105 | ||
106 | formats, subtitles = [], {} | |
107 | headers = { | |
108 | 'Accept': '*/*', | |
109 | 'Origin': 'https://videos.sproutvideo.com', | |
110 | 'Referer': url, | |
111 | } | |
112 | ||
113 | # HLS extraction is fatal; only attempt it if the JSON data says it's available | |
114 | if traverse_obj(data, 'hls'): | |
115 | manifest_query = self._policy_to_qs(data, 'm') | |
116 | fragment_query = self._policy_to_qs(data, 't', as_string=True) | |
117 | key_query = self._policy_to_qs(data, 'k', as_string=True) | |
118 | ||
119 | formats.extend(self._extract_m3u8_formats( | |
120 | self._M3U8_URL_TMPL.format(**data), video_id, 'mp4', | |
121 | m3u8_id='hls', headers=headers, query=manifest_query)) | |
122 | for fmt in formats: | |
123 | fmt.update({ | |
124 | 'url': update_url_query(fmt['url'], manifest_query), | |
125 | 'extra_param_to_segment_url': fragment_query, | |
126 | 'extra_param_to_key_url': key_query, | |
127 | }) | |
128 | ||
129 | if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))): | |
130 | quality = qualities(self._QUALITIES) | |
131 | acodec = 'none' if data.get('has_audio') is False else None | |
132 | formats.extend([{ | |
133 | 'format_id': str(format_id), | |
134 | 'url': format_url, | |
135 | 'ext': 'mp4', | |
136 | 'quality': quality(format_id), | |
137 | 'acodec': acodec, | |
138 | } for format_id, format_url in downloads]) | |
139 | ||
140 | for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))): | |
141 | subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({ | |
142 | 'url': sub_data['src'], | |
143 | }) | |
144 | ||
145 | return { | |
146 | 'id': video_id, | |
147 | 'formats': formats, | |
148 | 'subtitles': subtitles, | |
149 | 'http_headers': headers, | |
150 | **traverse_obj(data, { | |
151 | 'title': ('title', {str}), | |
152 | 'duration': ('duration', {int_or_none}), | |
153 | 'thumbnail': ('posterframe_url', {url_or_none}), | |
154 | }), | |
155 | } | |
156 | ||
157 | ||
158 | class VidsIoIE(InfoExtractor): | |
159 | IE_NAME = 'vids.io' | |
160 | _VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)' | |
161 | _TESTS = [{ | |
162 | 'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming', | |
163 | 'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e', | |
164 | 'info_dict': { | |
165 | 'id': '799cd8b11c10efc1f0', | |
166 | 'ext': 'mp4', | |
167 | 'title': 'How to Video: Live Streaming', | |
168 | 'duration': 2787, | |
169 | 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', | |
170 | }, | |
171 | }] | |
172 | ||
173 | def _real_extract(self, url): | |
174 | video_id, display_id = self._match_valid_url(url).group('id', 'display_id') | |
175 | webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403) | |
176 | ||
177 | if urlh.status == 403: | |
178 | password = self.get_param('videopassword') | |
179 | if not password: | |
180 | raise ExtractorError( | |
181 | 'This video is password-protected; use the --video-password option', expected=True) | |
182 | try: | |
183 | webpage = self._download_webpage( | |
184 | url, display_id, 'Submitting video password', | |
185 | data=urlencode_postdata({ | |
186 | 'password': password, | |
187 | **self._hidden_inputs(webpage), | |
188 | })) | |
189 | # Requests with user's session cookie `_sproutvideo_session` are now authorized | |
190 | except ExtractorError as e: | |
191 | if isinstance(e.cause, HTTPError) and e.cause.status == 403: | |
192 | raise ExtractorError('Incorrect password', expected=True) | |
193 | raise | |
194 | ||
195 | if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None): | |
196 | return self.url_result(embed_url, SproutVideoIE, video_id) | |
197 | ||
198 | raise ExtractorError('Unable to extract any SproutVideo embed url') |