]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/zoom.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / zoom.py
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 int_or_none,
5 js_to_json,
6 parse_filesize,
7 parse_resolution,
8 str_or_none,
9 traverse_obj,
10 url_basename,
11 urlencode_postdata,
12 urljoin,
13 )
14
15
16 class ZoomIE(InfoExtractor):
17 IE_NAME = 'zoom'
18 _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom\.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[\w.-]+)'
19 _TESTS = [{
20 'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
21 'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
22 'info_dict': {
23 'id': 'dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
24 'ext': 'mp4',
25 'title': 'China\'s "two sessions" and the new five-year plan',
26 },
27 'skip': 'Recording requires email authentication to access',
28 }, {
29 # play URL
30 'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
31 'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
32 'info_dict': {
33 'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
34 'ext': 'mp4',
35 'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
36 },
37 }, {
38 # share URL
39 'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
40 'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
41 'info_dict': {
42 'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
43 'ext': 'mp4',
44 'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
45 },
46 'skip': 'This recording has expired',
47 }, {
48 # view_with_share URL
49 'url': 'https://cityofdetroit.zoom.us/rec/share/VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
50 'md5': 'bdc7867a5934c151957fb81321b3c024',
51 'info_dict': {
52 'id': 'VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
53 'ext': 'mp4',
54 'title': 'February 2022 Detroit Revenue Estimating Conference',
55 'duration': 7299,
56 'formats': 'mincount:3',
57 },
58 }]
59
60 def _get_page_data(self, webpage, video_id):
61 return self._search_json(
62 r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
63
64 def _get_real_webpage(self, url, base_url, video_id, url_type):
65 webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
66 try:
67 form = self._form_hidden_inputs('password_form', webpage)
68 except ExtractorError:
69 return webpage
70
71 password = self.get_param('videopassword')
72 if not password:
73 raise ExtractorError(
74 'This video is protected by a passcode, use the --video-password option', expected=True)
75 is_meeting = form.get('useWhichPasswd') == 'meeting'
76 validation = self._download_json(
77 base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
78 video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
79 'id': form[('meet' if is_meeting else 'file') + 'Id'],
80 'passwd': password,
81 'action': form.get('action'),
82 }))
83 if not validation.get('status'):
84 raise ExtractorError(validation['errorMessage'], expected=True)
85 return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
86
87 def _real_extract(self, url):
88 base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
89 query = {}
90
91 if url_type == 'share':
92 webpage = self._get_real_webpage(url, base_url, video_id, 'share')
93 meeting_id = self._get_page_data(webpage, video_id)['meetingId']
94 redirect_path = self._download_json(
95 f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
96 video_id, note='Downloading share info JSON')['result']['redirectUrl']
97 url = urljoin(base_url, redirect_path)
98 query['continueMode'] = 'true'
99
100 webpage = self._get_real_webpage(url, base_url, video_id, 'play')
101 file_id = self._get_page_data(webpage, video_id)['fileId']
102 if not file_id:
103 # When things go wrong, file_id can be empty string
104 raise ExtractorError('Unable to extract file ID')
105
106 data = self._download_json(
107 f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query,
108 note='Downloading play info JSON')['result']
109
110 subtitles = {}
111 for _type in ('transcript', 'cc', 'chapter'):
112 if data.get('%sUrl' % _type):
113 subtitles[_type] = [{
114 'url': urljoin(base_url, data['%sUrl' % _type]),
115 'ext': 'vtt',
116 }]
117
118 formats = []
119
120 if data.get('viewMp4Url'):
121 formats.append({
122 'format_note': 'Camera stream',
123 'url': data['viewMp4Url'],
124 'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
125 'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
126 'format_id': 'view',
127 'ext': 'mp4',
128 'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
129 'preference': 0
130 })
131
132 if data.get('shareMp4Url'):
133 formats.append({
134 'format_note': 'Screen share stream',
135 'url': data['shareMp4Url'],
136 'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
137 'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
138 'format_id': 'share',
139 'ext': 'mp4',
140 'preference': -1
141 })
142
143 view_with_share_url = data.get('viewMp4WithshareUrl')
144 if view_with_share_url:
145 formats.append({
146 **parse_resolution(self._search_regex(
147 r'_(\d+x\d+)\.mp4', url_basename(view_with_share_url), 'resolution', default=None)),
148 'format_note': 'Screen share with camera',
149 'url': view_with_share_url,
150 'format_id': 'view_with_share',
151 'ext': 'mp4',
152 'preference': 1
153 })
154
155 return {
156 'id': video_id,
157 'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
158 'duration': int_or_none(data.get('duration')),
159 'subtitles': subtitles,
160 'formats': formats,
161 'http_headers': {
162 'Referer': base_url,
163 },
164 }