]>
Commit | Line | Data |
---|---|---|
1 | import datetime as dt | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from .redge import RedCDNLivxIE | |
5 | from ..utils import ( | |
6 | clean_html, | |
7 | join_nonempty, | |
8 | js_to_json, | |
9 | strip_or_none, | |
10 | update_url_query, | |
11 | ) | |
12 | from ..utils.traversal import traverse_obj | |
13 | ||
14 | ||
15 | def is_dst(date): | |
16 | last_march = dt.datetime(date.year, 3, 31) | |
17 | last_october = dt.datetime(date.year, 10, 31) | |
18 | last_sunday_march = last_march - dt.timedelta(days=last_march.isoweekday() % 7) | |
19 | last_sunday_october = last_october - dt.timedelta(days=last_october.isoweekday() % 7) | |
20 | return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3) | |
21 | ||
22 | ||
23 | def rfc3339_to_atende(date): | |
24 | date = dt.datetime.fromisoformat(date) | |
25 | date = date + dt.timedelta(hours=1 if is_dst(date) else 0) | |
26 | return int((date.timestamp() - 978307200) * 1000) | |
27 | ||
28 | ||
29 | class SejmIE(InfoExtractor): | |
30 | _VALID_URL = ( | |
31 | r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)', | |
32 | r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)', | |
33 | r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)', | |
34 | ) | |
35 | IE_NAME = 'sejm' | |
36 | ||
37 | _TESTS = [{ | |
38 | # multiple cameras, polish SL iterpreter | |
39 | 'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5', | |
40 | 'info_dict': { | |
41 | 'id': '6181EF1AD9CEEBB5C1258A6D006452B5', | |
42 | 'title': '1. posiedzenie Sejmu X kadencji', | |
43 | 'duration': 20145, | |
44 | 'live_status': 'was_live', | |
45 | 'location': 'Sala Posiedzeń', | |
46 | }, | |
47 | 'playlist': [{ | |
48 | 'info_dict': { | |
49 | 'id': 'ENC01-722340000000-722360145000', | |
50 | 'ext': 'mp4', | |
51 | 'duration': 20145, | |
52 | 'title': '1. posiedzenie Sejmu X kadencji - ENC01', | |
53 | 'live_status': 'was_live', | |
54 | }, | |
55 | }, { | |
56 | 'info_dict': { | |
57 | 'id': 'ENC30-722340000000-722360145000', | |
58 | 'ext': 'mp4', | |
59 | 'duration': 20145, | |
60 | 'title': '1. posiedzenie Sejmu X kadencji - ENC30', | |
61 | 'live_status': 'was_live', | |
62 | }, | |
63 | }, { | |
64 | 'info_dict': { | |
65 | 'id': 'ENC31-722340000000-722360145000', | |
66 | 'ext': 'mp4', | |
67 | 'duration': 20145, | |
68 | 'title': '1. posiedzenie Sejmu X kadencji - ENC31', | |
69 | 'live_status': 'was_live', | |
70 | }, | |
71 | }, { | |
72 | 'info_dict': { | |
73 | 'id': 'ENC32-722340000000-722360145000', | |
74 | 'ext': 'mp4', | |
75 | 'duration': 20145, | |
76 | 'title': '1. posiedzenie Sejmu X kadencji - ENC32', | |
77 | 'live_status': 'was_live', | |
78 | }, | |
79 | }, { | |
80 | # sign lang interpreter | |
81 | 'info_dict': { | |
82 | 'id': 'Migacz-ENC01-1-722340000000-722360145000', | |
83 | 'ext': 'mp4', | |
84 | 'duration': 20145, | |
85 | 'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01', | |
86 | 'live_status': 'was_live', | |
87 | }, | |
88 | }], | |
89 | }, { | |
90 | 'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2', | |
91 | 'info_dict': { | |
92 | 'id': '9377A9D65518E9A5C125808E002E9FF2', | |
93 | 'title': 'Debata "Lepsza Polska: obywatelska"', | |
94 | 'description': 'KP .Nowoczesna', | |
95 | 'duration': 8770, | |
96 | 'live_status': 'was_live', | |
97 | 'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)', | |
98 | }, | |
99 | 'playlist': [{ | |
100 | 'info_dict': { | |
101 | 'id': 'ENC08-1-503831270000-503840040000', | |
102 | 'ext': 'mp4', | |
103 | 'duration': 8770, | |
104 | 'title': 'Debata "Lepsza Polska: obywatelska" - ENC08', | |
105 | 'live_status': 'was_live', | |
106 | }, | |
107 | }], | |
108 | }, { | |
109 | # 7th term is very special, since it does not use redcdn livx | |
110 | 'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F', | |
111 | 'info_dict': { | |
112 | 'id': 'A6E6D475ECCC6FE5C1257EF90034817F', | |
113 | 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu', | |
114 | 'description': 'SLD - Biuro Prasowe Klubu', | |
115 | 'duration': 514, | |
116 | 'location': 'sala 101/bud. C', | |
117 | 'live_status': 'was_live', | |
118 | }, | |
119 | 'playlist': [{ | |
120 | 'info_dict': { | |
121 | 'id': 'A6E6D475ECCC6FE5C1257EF90034817F', | |
122 | 'ext': 'mp4', | |
123 | 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu', | |
124 | 'duration': 514, | |
125 | }, | |
126 | }], | |
127 | }, { | |
128 | 'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492', | |
129 | 'only_matching': True, | |
130 | }] | |
131 | ||
132 | def _real_extract(self, url): | |
133 | term, video_id = self._match_valid_url(url).group('term', 'id') | |
134 | frame = self._download_webpage( | |
135 | f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}', | |
136 | video_id) | |
137 | # despite it says "transmisje_arch", it works for live streams too! | |
138 | data = self._download_json( | |
139 | f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}', | |
140 | video_id) | |
141 | params = data['params'] | |
142 | ||
143 | title = strip_or_none(data.get('title')) | |
144 | ||
145 | if data.get('status') == 'VIDEO_ENDED': | |
146 | live_status = 'was_live' | |
147 | elif data.get('status') == 'VIDEO_PLAYING': | |
148 | live_status = 'is_live' | |
149 | else: | |
150 | live_status = None | |
151 | self.report_warning(f'unknown status: {data.get("status")}') | |
152 | ||
153 | start_time = rfc3339_to_atende(params['start']) | |
154 | # current streams have a stop time of *expected* end of session, but actual times | |
155 | # can change during the transmission. setting a stop_time would artificially | |
156 | # end the stream at that time, while the session actually keeps going. | |
157 | if live_status == 'was_live': | |
158 | stop_time = rfc3339_to_atende(params['stop']) | |
159 | duration = (stop_time - start_time) // 1000 | |
160 | else: | |
161 | stop_time, duration = None, None | |
162 | ||
163 | entries = [] | |
164 | ||
165 | def add_entry(file, legacy_file=False): | |
166 | if not file: | |
167 | return | |
168 | file = self._proto_relative_url(file) | |
169 | if not legacy_file: | |
170 | file = update_url_query(file, {'startTime': start_time}) | |
171 | if stop_time is not None: | |
172 | file = update_url_query(file, {'stopTime': stop_time}) | |
173 | stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id') | |
174 | common_info = { | |
175 | 'url': file, | |
176 | 'duration': duration, | |
177 | } | |
178 | if legacy_file: | |
179 | entries.append({ | |
180 | **common_info, | |
181 | 'id': video_id, | |
182 | 'title': title, | |
183 | }) | |
184 | else: | |
185 | entries.append({ | |
186 | **common_info, | |
187 | '_type': 'url_transparent', | |
188 | 'ie_key': RedCDNLivxIE.ie_key(), | |
189 | 'id': stream_id, | |
190 | 'title': join_nonempty(title, stream_id, delim=' - '), | |
191 | }) | |
192 | ||
193 | cameras = self._search_json( | |
194 | r'var\s+cameras\s*=', frame, 'camera list', video_id, | |
195 | contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json, | |
196 | fatal=False) or [] | |
197 | for camera_file in traverse_obj(cameras, (..., 'file', {dict})): | |
198 | if camera_file.get('flv'): | |
199 | add_entry(camera_file['flv']) | |
200 | elif camera_file.get('mp4'): | |
201 | # this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx | |
202 | add_entry(camera_file['mp4'], legacy_file=True) | |
203 | else: | |
204 | self.report_warning('Unknown camera stream type found') | |
205 | ||
206 | if params.get('mig'): | |
207 | add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False)) | |
208 | ||
209 | return { | |
210 | '_type': 'playlist', | |
211 | 'entries': entries, | |
212 | 'id': video_id, | |
213 | 'title': title, | |
214 | 'description': clean_html(data.get('desc')) or None, | |
215 | 'duration': duration, | |
216 | 'live_status': live_status, | |
217 | 'location': strip_or_none(data.get('location')), | |
218 | } |