jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import json
	2	import re
	3	import time
	4	import urllib.parse
	5	import uuid
	6
	7	from .common import InfoExtractor
	8	from ..utils import (
	9	ExtractorError,
	10	float_or_none,
	11	int_or_none,
	12	strip_or_none,
	13	traverse_obj,
	14	unified_timestamp,
	15	)
	16
	17
	18	class RedBeeBaseIE(InfoExtractor):
	19	_DEVICE_ID = str(uuid.uuid4())
	20
	21	@property
	22	def _API_URL(self):
	23	"""
	24	Ref: https://apidocs.emp.ebsd.ericsson.net
	25	Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT
	26	"""
	27	return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}'
	28
	29	def _get_bearer_token(self, asset_id, jwt=None):
	30	request = {
	31	'deviceId': self._DEVICE_ID,
	32	'device': {
	33	'deviceId': self._DEVICE_ID,
	34	'name': 'Mozilla Firefox 102',
	35	'type': 'WEB',
	36	},
	37	}
	38	if jwt:
	39	request['jwt'] = jwt
	40
	41	return self._download_json(
	42	f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}',
	43	asset_id, data=json.dumps(request).encode('utf-8'), headers={
	44	'Content-Type': 'application/json;charset=utf-8'
	45	})['sessionToken']
	46
	47	def _get_formats_and_subtitles(self, asset_id, **kwargs):
	48	bearer_token = self._get_bearer_token(asset_id, **kwargs)
	49	api_response = self._download_json(
	50	f'{self._API_URL}/entitlement/{asset_id}/play',
	51	asset_id, headers={
	52	'Authorization': f'Bearer {bearer_token}',
	53	'Accept': 'application/json, text/plain, /'
	54	})
	55
	56	formats, subtitles = [], {}
	57	for format in api_response['formats']:
	58	if not format.get('mediaLocator'):
	59	continue
	60
	61	fmts, subs = [], {}
	62	if format.get('format') == 'DASH':
	63	fmts, subs = self._extract_mpd_formats_and_subtitles(
	64	format['mediaLocator'], asset_id, fatal=False)
	65	elif format.get('format') == 'SMOOTHSTREAMING':
	66	fmts, subs = self._extract_ism_formats_and_subtitles(
	67	format['mediaLocator'], asset_id, fatal=False)
	68	elif format.get('format') == 'HLS':
	69	fmts, subs = self._extract_m3u8_formats_and_subtitles(
	70	format['mediaLocator'], asset_id, fatal=False)
	71
	72	formats.extend(fmts)
	73	self._merge_subtitles(subs, target=subtitles)
	74
	75	return formats, subtitles
	76
	77
	78	class ParliamentLiveUKIE(RedBeeBaseIE):
	79	IE_NAME = 'parliamentlive.tv'
	80	IE_DESC = 'UK parliament videos'
	81	_VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
	82
	83	_REDBEE_CUSTOMER = 'UKParliament'
	84	_REDBEE_BUSINESS_UNIT = 'ParliamentLive'
	85
	86	_TESTS = [{
	87	'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
	88	'info_dict': {
	89	'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
	90	'ext': 'mp4',
	91	'title': 'Home Affairs Committee',
	92	'timestamp': 1395153872,
	93	'upload_date': '20140318',
	94	'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail',
	95	},
	96	}, {
	97	'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
	98	'only_matching': True,
	99	}, {
	100	'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377',
	101	'info_dict': {
	102	'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377',
	103	'ext': 'mp4',
	104	'title': 'House of Commons',
	105	'timestamp': 1658392447,
	106	'upload_date': '20220721',
	107	'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail',
	108	},
	109	}]
	110
	111	def _real_extract(self, url):
	112	video_id = self._match_id(url)
	113
	114	formats, subtitles = self._get_formats_and_subtitles(video_id)
	115	self._sort_formats(formats)
	116
	117	video_info = self._download_json(
	118	f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False)
	119
	120	self._sort_formats(formats, ['res', 'proto'])
	121
	122	return {
	123	'id': video_id,
	124	'formats': formats,
	125	'subtitles': subtitles,
	126	'title': traverse_obj(video_info, ('event', 'title')),
	127	'thumbnail': traverse_obj(video_info, 'thumbnailUrl'),
	128	'timestamp': traverse_obj(
	129	video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp),
	130	}
	131
	132
	133	class RTBFIE(RedBeeBaseIE):
	134	_VALID_URL = r'''(?x)
	135	https?://(?:www\.)?rtbf\.be/
	136	(?:
	137	video/[^?]+\?.*\bid=\|
	138	ouftivi/(?:[^/]+/)[^?]+\?.\bvideoId=\|
	139	auvio/[^/]+\?.*\b(?P<live>l)?id=
	140	)(?P<id>\d+)'''
	141	_NETRC_MACHINE = 'rtbf'
	142
	143	_REDBEE_CUSTOMER = 'RTBF'
	144	_REDBEE_BUSINESS_UNIT = 'Auvio'
	145
	146	_TESTS = [{
	147	'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
	148	'md5': '8c876a1cceeb6cf31b476461ade72384',
	149	'info_dict': {
	150	'id': '1921274',
	151	'ext': 'mp4',
	152	'title': 'Les Diables au coeur (épisode 2)',
	153	'description': '(du 25/04/2014)',
	154	'duration': 3099.54,
	155	'upload_date': '20140425',
	156	'timestamp': 1398456300,
	157	},
	158	'skip': 'No longer available',
	159	}, {
	160	# geo restricted
	161	'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
	162	'only_matching': True,
	163	}, {
	164	'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
	165	'only_matching': True,
	166	}, {
	167	'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
	168	'only_matching': True,
	169	}, {
	170	# Live
	171	'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
	172	'only_matching': True,
	173	}, {
	174	# Audio
	175	'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
	176	'only_matching': True,
	177	}, {
	178	# With Subtitle
	179	'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
	180	'only_matching': True,
	181	}, {
	182	'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926',
	183	'md5': 'd5d11bb62169fef38d7ce7ac531e034f',
	184	'info_dict': {
	185	'id': '2921926',
	186	'ext': 'mp4',
	187	'title': 'Le handicap un confinement perpétuel - Maladie de Lyme',
	188	'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52',
	189	'duration': 5258.8,
	190	'upload_date': '20220727',
	191	'timestamp': 1658934000,
	192	'series': '#Investigation',
	193	'thumbnail': r're:^https?://[^?&]+\.jpg$',
	194	},
	195	}, {
	196	'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492',
	197	'md5': '054f9f143bc79c89647c35e5a7d35fa8',
	198	'info_dict': {
	199	'id': '2920492',
	200	'ext': 'mp4',
	201	'title': '04 - Le crime de la rue Royale',
	202	'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6',
	203	'duration': 1574.6,
	204	'upload_date': '20220723',
	205	'timestamp': 1658596887,
	206	'series': 'La Belgique criminelle - TV',
	207	'thumbnail': r're:^https?://[^?&]+\.jpg$',
	208	},
	209	}]
	210
	211	_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
	212	_PROVIDERS = {
	213	'YOUTUBE': 'Youtube',
	214	'DAILYMOTION': 'Dailymotion',
	215	'VIMEO': 'Vimeo',
	216	}
	217	_QUALITIES = [
	218	('mobile', 'SD'),
	219	('web', 'MD'),
	220	('high', 'HD'),
	221	]
	222	_LOGIN_URL = 'https://login.rtbf.be/accounts.login'
	223	_GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO'
	224	_LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}'
	225
	226	def _perform_login(self, username, password):
	227	if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID):
	228	return
	229
	230	self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600)
	231
	232	login_response = self._download_json(
	233	self._LOGIN_URL, None, data=urllib.parse.urlencode({
	234	'loginID': username,
	235	'password': password,
	236	'APIKey': self._GIGYA_API_KEY,
	237	'targetEnv': 'jssdk',
	238	'sessionExpiration': '-2',
	239	}).encode('utf-8'), headers={
	240	'Content-Type': 'application/x-www-form-urlencoded',
	241	})
	242
	243	if login_response['statusCode'] != 200:
	244	raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True)
	245
	246	self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'],
	247	secure=True, expire_time=time.time() + 3600)
	248
	249	def _get_formats_and_subtitles(self, url, media_id):
	250	login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID)
	251	if not login_token:
	252	self.raise_login_required()
	253
	254	session_jwt = self._download_json(
	255	'https://login.rtbf.be/accounts.getJWT', media_id, query={
	256	'login_token': login_token.value,
	257	'APIKey': self._GIGYA_API_KEY,
	258	'sdk': 'js_latest',
	259	'authMode': 'cookie',
	260	'pageURL': url,
	261	'sdkBuild': '13273',
	262	'format': 'json',
	263	})['id_token']
	264
	265	return super()._get_formats_and_subtitles(media_id, jwt=session_jwt)
	266
	267	def _real_extract(self, url):
	268	live, media_id = self._match_valid_url(url).groups()
	269	embed_page = self._download_webpage(
	270	'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
	271	media_id, query={'id': media_id})
	272	data = self._parse_json(self._html_search_regex(
	273	r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
	274
	275	error = data.get('error')
	276	if error:
	277	raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
	278
	279	provider = data.get('provider')
	280	if provider in self._PROVIDERS:
	281	return self.url_result(data['url'], self._PROVIDERS[provider])
	282
	283	title = data['subtitle']
	284	is_live = data.get('isLive')
	285	height_re = r'-(\d+)p\.'
	286	formats = []
	287
	288	m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
	289	if m3u8_url:
	290	formats.extend(self._extract_m3u8_formats(
	291	m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
	292
	293	fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
	294	http_url = data.get('url')
	295	if formats and http_url and re.search(height_re, http_url):
	296	http_url = fix_url(http_url)
	297	for m3u8_f in formats[:]:
	298	height = m3u8_f.get('height')
	299	if not height:
	300	continue
	301	f = m3u8_f.copy()
	302	del f['protocol']
	303	f.update({
	304	'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
	305	'url': re.sub(height_re, '-%dp.' % height, http_url),
	306	})
	307	formats.append(f)
	308	else:
	309	sources = data.get('sources') or {}
	310	for key, format_id in self._QUALITIES:
	311	format_url = sources.get(key)
	312	if not format_url:
	313	continue
	314	height = int_or_none(self._search_regex(
	315	height_re, format_url, 'height', default=None))
	316	formats.append({
	317	'format_id': format_id,
	318	'url': fix_url(format_url),
	319	'height': height,
	320	})
	321
	322	mpd_url = data.get('urlDash')
	323	if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
	324	formats.extend(self._extract_mpd_formats(
	325	mpd_url, media_id, mpd_id='dash', fatal=False))
	326
	327	audio_url = data.get('urlAudio')
	328	if audio_url:
	329	formats.append({
	330	'format_id': 'audio',
	331	'url': audio_url,
	332	'vcodec': 'none',
	333	})
	334
	335	subtitles = {}
	336	for track in (data.get('tracks') or {}).values():
	337	sub_url = track.get('url')
	338	if not sub_url:
	339	continue
	340	subtitles.setdefault(track.get('lang') or 'fr', []).append({
	341	'url': sub_url,
	342	})
	343
	344	if not formats:
	345	fmts, subs = self._get_formats_and_subtitles(url, media_id)
	346	formats.extend(fmts)
	347	self._merge_subtitles(subs, target=subtitles)
	348
	349	self._sort_formats(formats, ['res', 'proto'])
	350	return {
	351	'id': media_id,
	352	'formats': formats,
	353	'title': title,
	354	'description': strip_or_none(data.get('description')),
	355	'thumbnail': data.get('thumbnail'),
	356	'duration': float_or_none(data.get('realDuration')),
	357	'timestamp': int_or_none(data.get('liveFrom')),
	358	'series': data.get('programLabel'),
	359	'subtitles': subtitles,
	360	'is_live': is_live,
	361	}