]>
Commit | Line | Data |
---|---|---|
ff349ff9 DV |
1 | import json |
2 | import urllib.parse | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import determine_ext, int_or_none, url_or_none | |
6 | from ..utils.traversal import traverse_obj | |
7 | ||
8 | ||
9 | class SharePointIE(InfoExtractor): | |
10 | _BASE_URL_RE = r'https?://[\w-]+\.sharepoint\.com/' | |
11 | _VALID_URL = [ | |
12 | rf'{_BASE_URL_RE}:v:/[a-z]/(?:[^/?#]+/)*(?P<id>[^/?#]{{46}})/?(?:$|[?#])', | |
13 | rf'{_BASE_URL_RE}(?!:v:)(?:[^/?#]+/)*stream\.aspx\?(?:[^#]+&)?id=(?P<id>[^&#]+)', | |
14 | ] | |
15 | _TESTS = [{ | |
16 | 'url': 'https://lut-my.sharepoint.com/:v:/g/personal/juha_eerola_student_lab_fi/EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw?e=ZpQOOw', | |
17 | 'md5': '2950821d0d4937a0a76373782093b435', | |
18 | 'info_dict': { | |
19 | 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB', | |
20 | 'display_id': 'EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'CmvpJST', | |
23 | 'duration': 54.567, | |
24 | 'thumbnail': r're:https://.+/thumbnail', | |
25 | 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f', | |
26 | }, | |
27 | }, { | |
28 | 'url': 'https://greaternyace.sharepoint.com/:v:/s/acementornydrive/ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg?e=PQUfVb', | |
29 | 'md5': 'c496a01644223273bff12e93e501afd1', | |
30 | 'info_dict': { | |
31 | 'id': '01QI4AVTZ3ESFZPAD42VCKB5CZKAGLFVYB', | |
32 | 'display_id': 'ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg', | |
33 | 'ext': 'mp4', | |
34 | 'title': '930103681233985536', | |
35 | 'duration': 3797.326, | |
36 | 'thumbnail': r're:https://.+/thumbnail', | |
37 | }, | |
38 | }, { | |
39 | 'url': 'https://lut-my.sharepoint.com/personal/juha_eerola_student_lab_fi/_layouts/15/stream.aspx?id=%2Fpersonal%2Fjuha_eerola_student_lab_fi%2FDocuments%2FM-DL%2FCmvpJST.mp4&ga=1&referrer=StreamWebApp.Web&referrerScenario=AddressBarCopied.view', | |
40 | 'info_dict': { | |
41 | 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB', | |
42 | 'display_id': '/personal/juha_eerola_student_lab_fi/Documents/M-DL/CmvpJST.mp4', | |
43 | 'ext': 'mp4', | |
44 | 'title': 'CmvpJST', | |
45 | 'duration': 54.567, | |
46 | 'thumbnail': r're:https://.+/thumbnail', | |
47 | 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f', | |
48 | }, | |
49 | 'skip': 'Session cookies needed', | |
50 | }, { | |
51 | 'url': 'https://izoobasisschool.sharepoint.com/:v:/g/Eaqleq8COVBIvIPvod0U27oBypC6aWOkk8ptuDpmJ6arHw', | |
52 | 'only_matching': True, | |
53 | }, { | |
54 | 'url': 'https://uskudaredutr-my.sharepoint.com/:v:/g/personal/songul_turkaydin_uskudar_edu_tr/EbTf-VRUIbtGuIN73tx1MuwBCHBOmNcWNqSLw61Fd2_o0g?e=n5Vkof', | |
55 | 'only_matching': True, | |
56 | }, { | |
57 | 'url': 'https://epam-my.sharepoint.com/:v:/p/dzmitry_tamashevich/Ec4ZOs-rATZHjFYZWVxjczEB649FCoYFKDV_x3RxZiWAGA?e=4hswgA', | |
58 | 'only_matching': True, | |
59 | }, { | |
60 | 'url': 'https://microsoft.sharepoint.com/:v:/t/MicrosoftSPARKRecordings-MSFTInternal/EWCyeqByVWBAt8wDvNZdV-UB0BvU5YVbKm0UHgdrUlI6dg?e=QbPck6', | |
61 | 'only_matching': True, | |
62 | }] | |
63 | ||
64 | def _real_extract(self, url): | |
65 | display_id = urllib.parse.unquote(self._match_id(url)) | |
66 | webpage, urlh = self._download_webpage_handle(url, display_id) | |
67 | if urllib.parse.urlparse(urlh.url).hostname == 'login.microsoftonline.com': | |
68 | self.raise_login_required( | |
69 | 'Session cookies are required for this URL and can be passed ' | |
70 | 'with the --cookies option. The --cookies-from-browser option will not work', method=None) | |
71 | ||
72 | video_data = self._search_json(r'g_fileInfo\s*=', webpage, 'player config', display_id) | |
73 | video_id = video_data['VroomItemId'] | |
74 | ||
75 | parsed_url = urllib.parse.urlparse(video_data['.transformUrl']) | |
76 | base_media_url = urllib.parse.urlunparse(parsed_url._replace( | |
77 | path=urllib.parse.urljoin(f'{parsed_url.path}/', '../videomanifest'), | |
78 | query=urllib.parse.urlencode({ | |
79 | **urllib.parse.parse_qs(parsed_url.query), | |
80 | 'cTag': video_data['.ctag'], | |
81 | 'action': 'Access', | |
82 | 'part': 'index', | |
83 | }, doseq=True))) | |
84 | ||
85 | # Web player adds more params to the format URLs but we still get all formats without them | |
86 | formats = self._extract_mpd_formats( | |
87 | base_media_url, video_id, mpd_id='dash', query={'format': 'dash'}, fatal=False) | |
88 | for hls_type in ('hls', 'hls-vnext'): | |
89 | formats.extend(self._extract_m3u8_formats( | |
90 | base_media_url, video_id, 'mp4', m3u8_id=hls_type, | |
91 | query={'format': hls_type}, fatal=False, quality=-2)) | |
92 | ||
93 | if video_url := traverse_obj(video_data, ('downloadUrl', {url_or_none})): | |
94 | formats.append({ | |
95 | 'url': video_url, | |
96 | 'ext': determine_ext(video_data.get('extension') or video_data.get('name')), | |
97 | 'quality': 1, | |
98 | 'format_id': 'source', | |
99 | 'filesize': int_or_none(video_data.get('size')), | |
100 | 'vcodec': 'none' if video_data.get('isAudio') is True else None, | |
101 | }) | |
102 | ||
103 | return { | |
104 | 'id': video_id, | |
105 | 'formats': formats, | |
106 | 'title': video_data.get('title') or video_data.get('displayName'), | |
107 | 'display_id': display_id, | |
108 | 'uploader_id': video_data.get('authorId'), | |
109 | 'duration': traverse_obj(video_data, ( | |
110 | 'MediaServiceFastMetadata', {json.loads}, 'media', 'duration', {lambda x: x / 10000000})), | |
111 | 'thumbnail': url_or_none(video_data.get('thumbnailUrl')), | |
112 | } |