]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/zoom.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / zoom.py
CommitLineData
3f0852e3
RSK
1from .common import InfoExtractor
2from ..utils import (
81acad12 3 ExtractorError,
3f0852e3 4 int_or_none,
1418a043 5 js_to_json,
81acad12 6 parse_filesize,
3906de07
T
7 parse_resolution,
8 str_or_none,
79c77e85 9 traverse_obj,
3906de07 10 url_basename,
1418a043 11 urlencode_postdata,
eb0f9d68 12 urljoin,
3f0852e3
RSK
13)
14
15
366a7a47 16class ZoomIE(InfoExtractor):
17 IE_NAME = 'zoom'
b634ba74 18 _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom\.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[\w.-]+)'
90c1f512 19 _TESTS = [{
1418a043 20 'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
21 'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
3f0852e3 22 'info_dict': {
1418a043 23 'id': 'dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
24 'ext': 'mp4',
25 'title': 'China\'s "two sessions" and the new five-year plan',
a3eb987e
T
26 },
27 'skip': 'Recording requires email authentication to access',
90c1f512 28 }, {
29 # play URL
30 'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
31 'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
32 'info_dict': {
33 'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
34 'ext': 'mp4',
35 'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
36 },
37 }, {
38 # share URL
39 'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
40 'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
41 'info_dict': {
42 'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
43 'ext': 'mp4',
44 'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
45 },
3906de07
T
46 'skip': 'This recording has expired',
47 }, {
48 # view_with_share URL
49 'url': 'https://cityofdetroit.zoom.us/rec/share/VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
50 'md5': 'bdc7867a5934c151957fb81321b3c024',
51 'info_dict': {
52 'id': 'VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
53 'ext': 'mp4',
54 'title': 'February 2022 Detroit Revenue Estimating Conference',
55 'duration': 7299,
56 'formats': 'mincount:3',
57 },
90c1f512 58 }]
3f0852e3 59
90c1f512 60 def _get_page_data(self, webpage, video_id):
61 return self._search_json(
62 r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
81acad12 63
90c1f512 64 def _get_real_webpage(self, url, base_url, video_id, url_type):
65 webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
1418a043 66 try:
67 form = self._form_hidden_inputs('password_form', webpage)
68 except ExtractorError:
90c1f512 69 return webpage
70
71 password = self.get_param('videopassword')
72 if not password:
73 raise ExtractorError(
74 'This video is protected by a passcode, use the --video-password option', expected=True)
75 is_meeting = form.get('useWhichPasswd') == 'meeting'
76 validation = self._download_json(
77 base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
78 video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
79 'id': form[('meet' if is_meeting else 'file') + 'Id'],
80 'passwd': password,
81 'action': form.get('action'),
82 }))
83 if not validation.get('status'):
84 raise ExtractorError(validation['errorMessage'], expected=True)
85 return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
86
87 def _real_extract(self, url):
88 base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
3906de07 89 query = {}
90c1f512 90
91 if url_type == 'share':
92 webpage = self._get_real_webpage(url, base_url, video_id, 'share')
93 meeting_id = self._get_page_data(webpage, video_id)['meetingId']
94 redirect_path = self._download_json(
95 f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
96 video_id, note='Downloading share info JSON')['result']['redirectUrl']
97 url = urljoin(base_url, redirect_path)
3906de07 98 query['continueMode'] = 'true'
3f0852e3 99
90c1f512 100 webpage = self._get_real_webpage(url, base_url, video_id, 'play')
101 file_id = self._get_page_data(webpage, video_id)['fileId']
102 if not file_id:
103 # When things go wrong, file_id can be empty string
104 raise ExtractorError('Unable to extract file ID')
3f0852e3 105
79c77e85 106 data = self._download_json(
3906de07 107 f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query,
90c1f512 108 note='Downloading play info JSON')['result']
79c77e85 109
eb0f9d68 110 subtitles = {}
a3eb987e 111 for _type in ('transcript', 'cc', 'chapter'):
eb0f9d68 112 if data.get('%sUrl' % _type):
113 subtitles[_type] = [{
114 'url': urljoin(base_url, data['%sUrl' % _type]),
115 'ext': 'vtt',
116 }]
117
a3eb987e
T
118 formats = []
119
120 if data.get('viewMp4Url'):
121 formats.append({
122 'format_note': 'Camera stream',
3906de07 123 'url': data['viewMp4Url'],
79c77e85
SM
124 'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
125 'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
3906de07 126 'format_id': 'view',
a3eb987e 127 'ext': 'mp4',
79c77e85 128 'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
a3eb987e
T
129 'preference': 0
130 })
131
132 if data.get('shareMp4Url'):
133 formats.append({
134 'format_note': 'Screen share stream',
3906de07 135 'url': data['shareMp4Url'],
79c77e85
SM
136 'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
137 'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
3906de07 138 'format_id': 'share',
a3eb987e
T
139 'ext': 'mp4',
140 'preference': -1
141 })
142
3906de07
T
143 view_with_share_url = data.get('viewMp4WithshareUrl')
144 if view_with_share_url:
145 formats.append({
146 **parse_resolution(self._search_regex(
147 r'_(\d+x\d+)\.mp4', url_basename(view_with_share_url), 'resolution', default=None)),
148 'format_note': 'Screen share with camera',
149 'url': view_with_share_url,
150 'format_id': 'view_with_share',
151 'ext': 'mp4',
152 'preference': 1
153 })
154
3f0852e3 155 return {
90c1f512 156 'id': video_id,
79c77e85 157 'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
66cc64ff 158 'duration': int_or_none(data.get('duration')),
eb0f9d68 159 'subtitles': subtitles,
a3eb987e 160 'formats': formats,
1418a043 161 'http_headers': {
162 'Referer': base_url,
163 },
55cd2999 164 }