]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import traverse_obj | |
3 | ||
4 | ||
5 | class CellebriteIE(InfoExtractor): | |
6 | _VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)' | |
7 | _TESTS = [{ | |
8 | 'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/', | |
9 | 'info_dict': { | |
10 | 'id': '16025876', | |
11 | 'ext': 'mp4', | |
12 | 'description': 'md5:174571cb97083fd1d457d75c684f4e2b', | |
13 | 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png', | |
14 | 'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED', | |
15 | 'duration': 455, | |
16 | 'tags': [], | |
17 | } | |
18 | }, { | |
19 | 'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/', | |
20 | 'info_dict': { | |
21 | 'id': '29018255', | |
22 | 'ext': 'mp4', | |
23 | 'duration': 134, | |
24 | 'tags': [], | |
25 | 'description': 'md5:e9a3d124c7287b0b07bad2547061cacf', | |
26 | 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png', | |
27 | 'title': 'Android Extractions Explained', | |
28 | } | |
29 | }] | |
30 | ||
31 | def _get_formats_and_subtitles(self, json_data, display_id): | |
32 | formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []] | |
33 | subtitles = {} | |
34 | ||
35 | for url in traverse_obj(json_data, ('hls', ..., 'url')) or []: | |
36 | fmt, sub = self._extract_m3u8_formats_and_subtitles( | |
37 | url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'}) | |
38 | formats.extend(fmt) | |
39 | self._merge_subtitles(sub, target=subtitles) | |
40 | ||
41 | return formats, subtitles | |
42 | ||
43 | def _real_extract(self, url): | |
44 | display_id = self._match_id(url) | |
45 | webpage = self._download_webpage(url, display_id) | |
46 | ||
47 | player_uuid = self._search_regex( | |
48 | r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID') | |
49 | json_data = self._download_json( | |
50 | f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0] | |
51 | ||
52 | formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id) | |
53 | return { | |
54 | 'id': str(json_data['videoId']), | |
55 | 'title': json_data.get('name') or self._og_search_title(webpage), | |
56 | 'formats': formats, | |
57 | 'subtitles': subtitles, | |
58 | 'description': json_data.get('description') or self._og_search_description(webpage), | |
59 | 'duration': json_data.get('seconds'), | |
60 | 'tags': json_data.get('tags'), | |
61 | 'thumbnail': self._og_search_thumbnail(webpage), | |
62 | 'http_headers': {'Referer': 'https://play.vidyard.com/'}, | |
63 | } |