]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/zoom.py
[cleanup] Misc (#8338)
[yt-dlp.git] / yt_dlp / extractor / zoom.py
CommitLineData
3f0852e3
RSK
1from .common import InfoExtractor
2from ..utils import (
81acad12 3 ExtractorError,
3f0852e3 4 int_or_none,
a3eb987e 5 str_or_none,
1418a043 6 js_to_json,
81acad12 7 parse_filesize,
79c77e85 8 traverse_obj,
1418a043 9 urlencode_postdata,
eb0f9d68 10 urljoin,
3f0852e3
RSK
11)
12
13
366a7a47 14class ZoomIE(InfoExtractor):
15 IE_NAME = 'zoom'
b634ba74 16 _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom\.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[\w.-]+)'
90c1f512 17 _TESTS = [{
1418a043 18 'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
19 'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
3f0852e3 20 'info_dict': {
1418a043 21 'id': 'dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
22 'ext': 'mp4',
23 'title': 'China\'s "two sessions" and the new five-year plan',
a3eb987e
T
24 },
25 'skip': 'Recording requires email authentication to access',
90c1f512 26 }, {
27 # play URL
28 'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
29 'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
30 'info_dict': {
31 'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
32 'ext': 'mp4',
33 'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
34 },
35 }, {
36 # share URL
37 'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
38 'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
39 'info_dict': {
40 'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
41 'ext': 'mp4',
42 'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
43 },
44 }]
3f0852e3 45
90c1f512 46 def _get_page_data(self, webpage, video_id):
47 return self._search_json(
48 r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
81acad12 49
90c1f512 50 def _get_real_webpage(self, url, base_url, video_id, url_type):
51 webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
1418a043 52 try:
53 form = self._form_hidden_inputs('password_form', webpage)
54 except ExtractorError:
90c1f512 55 return webpage
56
57 password = self.get_param('videopassword')
58 if not password:
59 raise ExtractorError(
60 'This video is protected by a passcode, use the --video-password option', expected=True)
61 is_meeting = form.get('useWhichPasswd') == 'meeting'
62 validation = self._download_json(
63 base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
64 video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
65 'id': form[('meet' if is_meeting else 'file') + 'Id'],
66 'passwd': password,
67 'action': form.get('action'),
68 }))
69 if not validation.get('status'):
70 raise ExtractorError(validation['errorMessage'], expected=True)
71 return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
72
73 def _real_extract(self, url):
74 base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
75
76 if url_type == 'share':
77 webpage = self._get_real_webpage(url, base_url, video_id, 'share')
78 meeting_id = self._get_page_data(webpage, video_id)['meetingId']
79 redirect_path = self._download_json(
80 f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
81 video_id, note='Downloading share info JSON')['result']['redirectUrl']
82 url = urljoin(base_url, redirect_path)
3f0852e3 83
90c1f512 84 webpage = self._get_real_webpage(url, base_url, video_id, 'play')
85 file_id = self._get_page_data(webpage, video_id)['fileId']
86 if not file_id:
87 # When things go wrong, file_id can be empty string
88 raise ExtractorError('Unable to extract file ID')
3f0852e3 89
79c77e85 90 data = self._download_json(
90c1f512 91 f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id,
92 note='Downloading play info JSON')['result']
79c77e85 93
eb0f9d68 94 subtitles = {}
a3eb987e 95 for _type in ('transcript', 'cc', 'chapter'):
eb0f9d68 96 if data.get('%sUrl' % _type):
97 subtitles[_type] = [{
98 'url': urljoin(base_url, data['%sUrl' % _type]),
99 'ext': 'vtt',
100 }]
101
a3eb987e
T
102 formats = []
103
104 if data.get('viewMp4Url'):
105 formats.append({
106 'format_note': 'Camera stream',
107 'url': str_or_none(data.get('viewMp4Url')),
79c77e85
SM
108 'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
109 'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
110 'format_id': str_or_none(traverse_obj(data, ('recording', 'id'))),
a3eb987e 111 'ext': 'mp4',
79c77e85 112 'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
a3eb987e
T
113 'preference': 0
114 })
115
116 if data.get('shareMp4Url'):
117 formats.append({
118 'format_note': 'Screen share stream',
119 'url': str_or_none(data.get('shareMp4Url')),
79c77e85
SM
120 'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
121 'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
122 'format_id': str_or_none(traverse_obj(data, ('shareVideo', 'id'))),
a3eb987e
T
123 'ext': 'mp4',
124 'preference': -1
125 })
126
3f0852e3 127 return {
90c1f512 128 'id': video_id,
79c77e85 129 'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
66cc64ff 130 'duration': int_or_none(data.get('duration')),
eb0f9d68 131 'subtitles': subtitles,
a3eb987e 132 'formats': formats,
1418a043 133 'http_headers': {
134 'Referer': base_url,
135 },
55cd2999 136 }