]>
Commit | Line | Data |
---|---|---|
5014558a | 1 | from .common import InfoExtractor |
2 | from ..compat import compat_str | |
3 | from ..utils import ( | |
4 | int_or_none, | |
5 | try_get, | |
6 | unified_timestamp, | |
7 | ) | |
8 | ||
9 | ||
10 | class ParlviewIE(InfoExtractor): | |
11 | ||
12 | _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})' | |
13 | _TESTS = [{ | |
14 | 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', | |
15 | 'info_dict': { | |
16 | 'id': '542661', | |
17 | 'ext': 'mp4', | |
18 | 'title': "Australia's Family Law System [Part 2]", | |
19 | 'duration': 5799, | |
20 | 'description': 'md5:7099883b391619dbae435891ca871a62', | |
21 | 'timestamp': 1621430700, | |
22 | 'upload_date': '20210519', | |
23 | 'uploader': 'Joint Committee', | |
24 | }, | |
25 | 'params': { | |
26 | 'skip_download': True, | |
27 | } | |
28 | }, { | |
29 | 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', | |
30 | 'only_matching': True, | |
31 | }] | |
32 | _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json' | |
33 | _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab' | |
34 | ||
35 | def _real_extract(self, url): | |
36 | video_id = self._match_id(url) | |
37 | webpage = self._download_webpage(url, video_id) | |
38 | media = self._download_json(self._API_URL % video_id, video_id).get('media') | |
39 | timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], compat_str) or '/' | |
40 | ||
41 | stream = try_get(media, lambda x: x['renditions'][0], dict) | |
42 | if not stream: | |
43 | self.raise_no_formats('No streams were detected') | |
44 | elif stream.get('streamType') != 'VOD': | |
45 | self.raise_no_formats('Unknown type of stream was detected: "%s"' % str(stream.get('streamType'))) | |
46 | formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native') | |
5014558a | 47 | |
48 | media_info = self._download_webpage( | |
49 | self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False) | |
50 | ||
51 | return { | |
52 | 'id': video_id, | |
53 | 'url': url, | |
54 | 'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False), | |
55 | 'formats': formats, | |
56 | 'duration': int_or_none(media.get('duration')), | |
57 | 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), | |
58 | 'description': self._html_search_regex( | |
59 | r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)', | |
60 | webpage, 'description', fatal=False), | |
61 | 'uploader': self._html_search_regex( | |
62 | r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), | |
63 | 'thumbnail': media.get('staticImage'), | |
64 | } |