jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import datetime as dt
	2
	3	from .common import InfoExtractor
	4	from .redge import RedCDNLivxIE
	5	from ..utils import (
	6	clean_html,
	7	join_nonempty,
	8	js_to_json,
	9	strip_or_none,
	10	update_url_query,
	11	)
	12	from ..utils.traversal import traverse_obj
	13
	14
	15	def is_dst(date):
	16	last_march = dt.datetime(date.year, 3, 31)
	17	last_october = dt.datetime(date.year, 10, 31)
	18	last_sunday_march = last_march - dt.timedelta(days=last_march.isoweekday() % 7)
	19	last_sunday_october = last_october - dt.timedelta(days=last_october.isoweekday() % 7)
	20	return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
	21
	22
	23	def rfc3339_to_atende(date):
	24	date = dt.datetime.fromisoformat(date)
	25	date = date + dt.timedelta(hours=1 if is_dst(date) else 0)
	26	return int((date.timestamp() - 978307200) * 1000)
	27
	28
	29	class SejmIE(InfoExtractor):
	30	_VALID_URL = (
	31	r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
	32	r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
	33	r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
	34	)
	35	IE_NAME = 'sejm'
	36
	37	_TESTS = [{
	38	# multiple cameras, polish SL iterpreter
	39	'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5',
	40	'info_dict': {
	41	'id': '6181EF1AD9CEEBB5C1258A6D006452B5',
	42	'title': '1. posiedzenie Sejmu X kadencji',
	43	'duration': 20145,
	44	'live_status': 'was_live',
	45	'location': 'Sala Posiedzeń',
	46	},
	47	'playlist': [{
	48	'info_dict': {
	49	'id': 'ENC01-722340000000-722360145000',
	50	'ext': 'mp4',
	51	'duration': 20145,
	52	'title': '1. posiedzenie Sejmu X kadencji - ENC01',
	53	'live_status': 'was_live',
	54	},
	55	}, {
	56	'info_dict': {
	57	'id': 'ENC30-722340000000-722360145000',
	58	'ext': 'mp4',
	59	'duration': 20145,
	60	'title': '1. posiedzenie Sejmu X kadencji - ENC30',
	61	'live_status': 'was_live',
	62	},
	63	}, {
	64	'info_dict': {
	65	'id': 'ENC31-722340000000-722360145000',
	66	'ext': 'mp4',
	67	'duration': 20145,
	68	'title': '1. posiedzenie Sejmu X kadencji - ENC31',
	69	'live_status': 'was_live',
	70	},
	71	}, {
	72	'info_dict': {
	73	'id': 'ENC32-722340000000-722360145000',
	74	'ext': 'mp4',
	75	'duration': 20145,
	76	'title': '1. posiedzenie Sejmu X kadencji - ENC32',
	77	'live_status': 'was_live',
	78	},
	79	}, {
	80	# sign lang interpreter
	81	'info_dict': {
	82	'id': 'Migacz-ENC01-1-722340000000-722360145000',
	83	'ext': 'mp4',
	84	'duration': 20145,
	85	'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
	86	'live_status': 'was_live',
	87	},
	88	}],
	89	}, {
	90	'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2',
	91	'info_dict': {
	92	'id': '9377A9D65518E9A5C125808E002E9FF2',
	93	'title': 'Debata "Lepsza Polska: obywatelska"',
	94	'description': 'KP .Nowoczesna',
	95	'duration': 8770,
	96	'live_status': 'was_live',
	97	'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)',
	98	},
	99	'playlist': [{
	100	'info_dict': {
	101	'id': 'ENC08-1-503831270000-503840040000',
	102	'ext': 'mp4',
	103	'duration': 8770,
	104	'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
	105	'live_status': 'was_live',
	106	},
	107	}],
	108	}, {
	109	# 7th term is very special, since it does not use redcdn livx
	110	'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F',
	111	'info_dict': {
	112	'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
	113	'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
	114	'description': 'SLD - Biuro Prasowe Klubu',
	115	'duration': 514,
	116	'location': 'sala 101/bud. C',
	117	'live_status': 'was_live',
	118	},
	119	'playlist': [{
	120	'info_dict': {
	121	'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
	122	'ext': 'mp4',
	123	'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
	124	'duration': 514,
	125	},
	126	}],
	127	}, {
	128	'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492',
	129	'only_matching': True,
	130	}]
	131
	132	def _real_extract(self, url):
	133	term, video_id = self._match_valid_url(url).group('term', 'id')
	134	frame = self._download_webpage(
	135	f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
	136	video_id)
	137	# despite it says "transmisje_arch", it works for live streams too!
	138	data = self._download_json(
	139	f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
	140	video_id)
	141	params = data['params']
	142
	143	title = strip_or_none(data.get('title'))
	144
	145	if data.get('status') == 'VIDEO_ENDED':
	146	live_status = 'was_live'
	147	elif data.get('status') == 'VIDEO_PLAYING':
	148	live_status = 'is_live'
	149	else:
	150	live_status = None
	151	self.report_warning(f'unknown status: {data.get("status")}')
	152
	153	start_time = rfc3339_to_atende(params['start'])
	154	# current streams have a stop time of expected end of session, but actual times
	155	# can change during the transmission. setting a stop_time would artificially
	156	# end the stream at that time, while the session actually keeps going.
	157	if live_status == 'was_live':
	158	stop_time = rfc3339_to_atende(params['stop'])
	159	duration = (stop_time - start_time) // 1000
	160	else:
	161	stop_time, duration = None, None
	162
	163	entries = []
	164
	165	def add_entry(file, legacy_file=False):
	166	if not file:
	167	return
	168	file = self._proto_relative_url(file)
	169	if not legacy_file:
	170	file = update_url_query(file, {'startTime': start_time})
	171	if stop_time is not None:
	172	file = update_url_query(file, {'stopTime': stop_time})
	173	stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
	174	common_info = {
	175	'url': file,
	176	'duration': duration,
	177	}
	178	if legacy_file:
	179	entries.append({
	180	**common_info,
	181	'id': video_id,
	182	'title': title,
	183	})
	184	else:
	185	entries.append({
	186	**common_info,
	187	'_type': 'url_transparent',
	188	'ie_key': RedCDNLivxIE.ie_key(),
	189	'id': stream_id,
	190	'title': join_nonempty(title, stream_id, delim=' - '),
	191	})
	192
	193	cameras = self._search_json(
	194	r'var\s+cameras\s*=', frame, 'camera list', video_id,
	195	contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
	196	fatal=False) or []
	197	for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
	198	if camera_file.get('flv'):
	199	add_entry(camera_file['flv'])
	200	elif camera_file.get('mp4'):
	201	# this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
	202	add_entry(camera_file['mp4'], legacy_file=True)
	203	else:
	204	self.report_warning('Unknown camera stream type found')
	205
	206	if params.get('mig'):
	207	add_entry(self._search_regex(r"var sliUrl\s=\s'([^']+)'", frame, 'sign language interpreter url', fatal=False))
	208
	209	return {
	210	'_type': 'playlist',
	211	'entries': entries,
	212	'id': video_id,
	213	'title': title,
	214	'description': clean_html(data.get('desc')) or None,
	215	'duration': duration,
	216	'live_status': live_status,
	217	'location': strip_or_none(data.get('location')),
	218	}