]>
Commit | Line | Data |
---|---|---|
65cfa2b0 | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | ExtractorError, | |
6 | determine_ext, | |
7 | float_or_none, | |
8 | int_or_none, | |
9 | js_to_json, | |
10 | traverse_obj, | |
11 | url_or_none, | |
12 | ) | |
13 | ||
14 | ||
15 | class MuseAIIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://(?:www\.)?muse\.ai/(?:v|embed)/(?P<id>\w+)' | |
17 | _TESTS = [{ | |
18 | 'url': 'https://muse.ai/embed/YdTWvUW', | |
19 | 'md5': 'f994f9a38be1c3aaf9e37cbd7d76fe7c', | |
20 | 'info_dict': { | |
21 | 'id': 'YdTWvUW', | |
22 | 'ext': 'mp4', | |
23 | 'title': '2023-05-28-Grabien-1941111 (1)', | |
24 | 'description': '', | |
25 | 'uploader': 'Today News Africa', | |
26 | 'uploader_id': 'TodayNewsAfrica', | |
27 | 'upload_date': '20230528', | |
28 | 'timestamp': 1685285044, | |
29 | 'duration': 1291.3, | |
30 | 'view_count': int, | |
31 | 'availability': 'public', | |
32 | }, | |
33 | }, { | |
34 | 'url': 'https://muse.ai/v/gQ4gGAA-0756', | |
35 | 'md5': '52dbfc78e865e56dc19a1715badc35e8', | |
36 | 'info_dict': { | |
37 | 'id': 'gQ4gGAA', | |
38 | 'ext': 'mp4', | |
39 | 'title': '0756', | |
40 | 'description': 'md5:0ca1483f9aac423e9a96ad00bb3a0785', | |
41 | 'uploader': 'Aerial.ie', | |
42 | 'uploader_id': 'aerial', | |
43 | 'upload_date': '20210306', | |
44 | 'timestamp': 1615072842, | |
45 | 'duration': 21.4, | |
46 | 'view_count': int, | |
47 | 'availability': 'public', | |
48 | }, | |
49 | }] | |
50 | _WEBPAGE_TESTS = [{ | |
51 | 'url': 'https://muse.ai/docs', | |
52 | 'playlist_mincount': 4, | |
53 | 'info_dict': { | |
54 | 'id': 'docs', | |
55 | 'title': 'muse.ai | docs', | |
56 | 'description': 'md5:6c0293431481582739c82ee8902687fa', | |
57 | 'age_limit': 0, | |
58 | 'thumbnail': 'https://muse.ai/static/imgs/poster-img-docs.png', | |
59 | }, | |
60 | 'params': {'allowed_extractors': ['all', '-html5']}, | |
61 | }] | |
62 | _EMBED_REGEX = [r'<iframe[^>]*\bsrc=["\'](?P<url>https://muse\.ai/embed/\w+)'] | |
63 | ||
64 | @classmethod | |
65 | def _extract_embed_urls(cls, url, webpage): | |
66 | yield from super()._extract_embed_urls(url, webpage) | |
67 | for embed_id in re.findall(r'<script>[^<]*\bMusePlayer\(\{[^}<]*\bvideo:\s*["\'](\w+)["\']', webpage): | |
68 | yield f'https://muse.ai/embed/{embed_id}' | |
69 | ||
70 | def _real_extract(self, url): | |
71 | video_id = self._match_id(url) | |
72 | webpage = self._download_webpage(f'https://muse.ai/embed/{video_id}', video_id) | |
73 | data = self._search_json( | |
74 | r'player\.setData\(', webpage, 'player data', video_id, transform_source=js_to_json) | |
75 | ||
76 | source_url = data['url'] | |
77 | if not url_or_none(source_url): | |
78 | raise ExtractorError('Unable to extract video URL') | |
79 | ||
80 | formats = [{ | |
81 | 'url': source_url, | |
82 | 'format_id': 'source', | |
83 | 'quality': 1, | |
84 | **traverse_obj(data, { | |
85 | 'ext': ('filename', {determine_ext}), | |
86 | 'width': ('width', {int_or_none}), | |
87 | 'height': ('height', {int_or_none}), | |
88 | 'filesize': ('size', {int_or_none}), | |
89 | }), | |
90 | }] | |
91 | if source_url.endswith('/data'): | |
92 | base_url = f'{source_url[:-5]}/videos' | |
93 | formats.extend(self._extract_m3u8_formats( | |
94 | f'{base_url}/hls.m3u8', video_id, m3u8_id='hls', fatal=False)) | |
95 | formats.extend(self._extract_mpd_formats( | |
96 | f'{base_url}/dash.mpd', video_id, mpd_id='dash', fatal=False)) | |
97 | ||
98 | return { | |
99 | 'id': video_id, | |
100 | 'formats': formats, | |
101 | **traverse_obj(data, { | |
102 | 'title': ('title', {str}), | |
103 | 'description': ('description', {str}), | |
104 | 'duration': ('duration', {float_or_none}), | |
105 | 'timestamp': ('tcreated', {int_or_none}), | |
106 | 'uploader': ('owner_name', {str}), | |
107 | 'uploader_id': ('owner_username', {str}), | |
108 | 'view_count': ('views', {int_or_none}), | |
109 | 'age_limit': ('mature', {lambda x: 18 if x else None}), | |
110 | 'availability': ('visibility', {lambda x: x if x in ('private', 'unlisted') else 'public'}), | |
111 | }), | |
112 | } |