]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/zoom.py
[cleanup] Misc (#8338)
[yt-dlp.git] / yt_dlp / extractor / zoom.py
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 int_or_none,
5 str_or_none,
6 js_to_json,
7 parse_filesize,
8 traverse_obj,
9 urlencode_postdata,
10 urljoin,
11 )
12
13
14 class ZoomIE(InfoExtractor):
15 IE_NAME = 'zoom'
16 _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom\.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[\w.-]+)'
17 _TESTS = [{
18 'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
19 'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
20 'info_dict': {
21 'id': 'dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
22 'ext': 'mp4',
23 'title': 'China\'s "two sessions" and the new five-year plan',
24 },
25 'skip': 'Recording requires email authentication to access',
26 }, {
27 # play URL
28 'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
29 'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
30 'info_dict': {
31 'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
32 'ext': 'mp4',
33 'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
34 },
35 }, {
36 # share URL
37 'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
38 'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
39 'info_dict': {
40 'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
41 'ext': 'mp4',
42 'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
43 },
44 }]
45
46 def _get_page_data(self, webpage, video_id):
47 return self._search_json(
48 r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
49
50 def _get_real_webpage(self, url, base_url, video_id, url_type):
51 webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
52 try:
53 form = self._form_hidden_inputs('password_form', webpage)
54 except ExtractorError:
55 return webpage
56
57 password = self.get_param('videopassword')
58 if not password:
59 raise ExtractorError(
60 'This video is protected by a passcode, use the --video-password option', expected=True)
61 is_meeting = form.get('useWhichPasswd') == 'meeting'
62 validation = self._download_json(
63 base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
64 video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
65 'id': form[('meet' if is_meeting else 'file') + 'Id'],
66 'passwd': password,
67 'action': form.get('action'),
68 }))
69 if not validation.get('status'):
70 raise ExtractorError(validation['errorMessage'], expected=True)
71 return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
72
73 def _real_extract(self, url):
74 base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
75
76 if url_type == 'share':
77 webpage = self._get_real_webpage(url, base_url, video_id, 'share')
78 meeting_id = self._get_page_data(webpage, video_id)['meetingId']
79 redirect_path = self._download_json(
80 f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
81 video_id, note='Downloading share info JSON')['result']['redirectUrl']
82 url = urljoin(base_url, redirect_path)
83
84 webpage = self._get_real_webpage(url, base_url, video_id, 'play')
85 file_id = self._get_page_data(webpage, video_id)['fileId']
86 if not file_id:
87 # When things go wrong, file_id can be empty string
88 raise ExtractorError('Unable to extract file ID')
89
90 data = self._download_json(
91 f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id,
92 note='Downloading play info JSON')['result']
93
94 subtitles = {}
95 for _type in ('transcript', 'cc', 'chapter'):
96 if data.get('%sUrl' % _type):
97 subtitles[_type] = [{
98 'url': urljoin(base_url, data['%sUrl' % _type]),
99 'ext': 'vtt',
100 }]
101
102 formats = []
103
104 if data.get('viewMp4Url'):
105 formats.append({
106 'format_note': 'Camera stream',
107 'url': str_or_none(data.get('viewMp4Url')),
108 'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
109 'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
110 'format_id': str_or_none(traverse_obj(data, ('recording', 'id'))),
111 'ext': 'mp4',
112 'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
113 'preference': 0
114 })
115
116 if data.get('shareMp4Url'):
117 formats.append({
118 'format_note': 'Screen share stream',
119 'url': str_or_none(data.get('shareMp4Url')),
120 'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
121 'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
122 'format_id': str_or_none(traverse_obj(data, ('shareVideo', 'id'))),
123 'ext': 'mp4',
124 'preference': -1
125 })
126
127 return {
128 'id': video_id,
129 'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
130 'duration': int_or_none(data.get('duration')),
131 'subtitles': subtitles,
132 'formats': formats,
133 'http_headers': {
134 'Referer': base_url,
135 },
136 }