jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import hashlib
	2	import hmac
	3	import json
	4	import re
	5	import time
	6	import uuid
	7
	8	from .common import InfoExtractor
	9	from ..compat import compat_HTTPError, compat_str
	10	from ..utils import (
	11	ExtractorError,
	12	determine_ext,
	13	int_or_none,
	14	join_nonempty,
	15	str_or_none,
	16	traverse_obj,
	17	url_or_none,
	18	)
	19
	20
	21	class HotStarBaseIE(InfoExtractor):
	22	_BASE_URL = 'https://www.hotstar.com'
	23	_API_URL = 'https://api.hotstar.com'
	24	_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
	25
	26	def _call_api_v1(self, path, args, *kwargs):
	27	return self._download_json(
	28	f'{self._API_URL}/o/v1/{path}', args, *kwargs,
	29	headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'})
	30
	31	def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
	32	st = int_or_none(st) or int(time.time())
	33	exp = st + 6000
	34	auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
	35	auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
	36
	37	if cookies and cookies.get('userUP'):
	38	token = cookies.get('userUP').value
	39	else:
	40	token = self._download_json(
	41	f'{self._API_URL}/um/v3/users',
	42	video_id, note='Downloading token',
	43	data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'),
	44	headers={
	45	'hotstarauth': auth,
	46	'x-hs-platform': 'PCTV', # or 'web'
	47	'Content-Type': 'application/json',
	48	})['user_identity']
	49
	50	response = self._download_json(
	51	f'{self._API_URL}/{path}', video_id, query=query,
	52	headers={
	53	'hotstarauth': auth,
	54	'x-hs-appversion': '6.72.2',
	55	'x-hs-platform': 'web',
	56	'x-hs-usertoken': token,
	57	})
	58
	59	if response['message'] != "Playback URL's fetched successfully":
	60	raise ExtractorError(
	61	response['message'], expected=True)
	62	return response['data']
	63
	64	def _call_api_v2(self, path, video_id, st=None, cookies=None):
	65	return self._call_api_impl(
	66	f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={
	67	'desired-config': 'audio_channel:stereo\|container:fmp4\|dynamic_range:hdr\|encryption:plain\|ladder:tv\|package:dash\|resolution:fhd\|subs-tag:HotstarVIP\|video_codec:h265',
	68	'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()),
	69	'os-name': 'Windows',
	70	'os-version': '10',
	71	})
	72
	73	def _playlist_entries(self, path, item_id, root=None, **kwargs):
	74	results = self._call_api_v1(path, item_id, **kwargs)['body']['results']
	75	for video in traverse_obj(results, (('assets', None), 'items', ...)):
	76	if video.get('contentId'):
	77	yield self.url_result(
	78	HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId'])
	79
	80
	81	class HotStarIE(HotStarBaseIE):
	82	IE_NAME = 'hotstar'
	83	_VALID_URL = r'''(?x)
	84	https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
	85	(?:
	86	(?P<type>movies\|sports\|episode\|(?P<tv>tv\|shows))/
	87	(?(tv)(?:[^/?#]+/){2}\|[^?#]*)
	88	)?
	89	[^/?#]+/
	90	(?P<id>\d{10})
	91	'''
	92
	93	_TESTS = [{
	94	'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
	95	'info_dict': {
	96	'id': '1000076273',
	97	'ext': 'mp4',
	98	'title': 'Can You Not Spread Rumours?',
	99	'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
	100	'timestamp': 1447248600,
	101	'upload_date': '20151111',
	102	'duration': 381,
	103	'episode': 'Can You Not Spread Rumours?',
	104	},
	105	'params': {'skip_download': 'm3u8'},
	106	}, {
	107	'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
	108	'info_dict': {
	109	'id': '1000234847',
	110	'ext': 'mp4',
	111	'title': 'Janhvi Targets Suman',
	112	'description': 'md5:78a85509348910bd1ca31be898c5796b',
	113	'timestamp': 1556670600,
	114	'upload_date': '20190501',
	115	'duration': 1219,
	116	'channel': 'StarPlus',
	117	'channel_id': 3,
	118	'series': 'Ek Bhram - Sarvagun Sampanna',
	119	'season': 'Chapter 1',
	120	'season_number': 1,
	121	'season_id': 6771,
	122	'episode': 'Janhvi Targets Suman',
	123	'episode_number': 8,
	124	}
	125	}, {
	126	'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843',
	127	'info_dict': {
	128	'id': '1000282843',
	129	'ext': 'mp4',
	130	'title': 'Anupama, Anuj Share a Moment',
	131	'season': 'Chapter 1',
	132	'description': 'md5:8d74ed2248423b8b06d5c8add4d7a0c0',
	133	'timestamp': 1678149000,
	134	'channel': 'StarPlus',
	135	'series': 'Anupama',
	136	'season_number': 1,
	137	'season_id': 7399,
	138	'upload_date': '20230307',
	139	'episode': 'Anupama, Anuj Share a Moment',
	140	'episode_number': 853,
	141	'duration': 1272,
	142	'channel_id': 3,
	143	},
	144	}, {
	145	'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
	146	'only_matching': True,
	147	}, {
	148	'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104',
	149	'only_matching': True,
	150	}, {
	151	'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956',
	152	'only_matching': True,
	153	}]
	154	_GEO_BYPASS = False
	155
	156	_TYPE = {
	157	'movies': 'movie',
	158	'sports': 'match',
	159	'episode': 'episode',
	160	'tv': 'episode',
	161	'shows': 'episode',
	162	None: 'content',
	163	}
	164
	165	_IGNORE_MAP = {
	166	'res': 'resolution',
	167	'vcodec': 'video_codec',
	168	'dr': 'dynamic_range',
	169	}
	170
	171	_TAG_FIELDS = {
	172	'language': 'language',
	173	'acodec': 'audio_codec',
	174	'vcodec': 'video_codec',
	175	}
	176
	177	@classmethod
	178	def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None):
	179	assert None in (video_type, root)
	180	if not root:
	181	root = join_nonempty(cls._BASE_URL, video_type, delim='/')
	182	return f'{root}/{slug}/{video_id}'
	183
	184	def _real_extract(self, url):
	185	video_id, video_type = self._match_valid_url(url).group('id', 'type')
	186	video_type = self._TYPE.get(video_type, video_type)
	187	cookies = self._get_cookies(url) # Cookies before any request
	188
	189	video_data = self._call_api_v1(f'{video_type}/detail', video_id,
	190	query={'tas': 10000, 'contentId': video_id})['body']['results']['item']
	191	if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
	192	self.report_drm(video_id)
	193
	194	# See https://github.com/yt-dlp/yt-dlp/issues/396
	195	st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date')
	196
	197	geo_restricted = False
	198	formats, subs = [], {}
	199	headers = {'Referer': f'{self._BASE_URL}/in'}
	200
	201	# change to v2 in the future
	202	playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
	203	for playback_set in playback_sets:
	204	if not isinstance(playback_set, dict):
	205	continue
	206	tags = str_or_none(playback_set.get('tagsCombination')) or ''
	207	if any(f'{prefix}:{ignore}' in tags
	208	for key, prefix in self._IGNORE_MAP.items()
	209	for ignore in self._configuration_arg(key)):
	210	continue
	211	tag_dict = dict((t.split(':', 1) + [None])[:2] for t in tags.split(';'))
	212
	213	format_url = url_or_none(playback_set.get('playbackUrl'))
	214	if not format_url:
	215	continue
	216	format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url)
	217	ext = determine_ext(format_url)
	218
	219	current_formats, current_subs = [], {}
	220	try:
	221	if 'package:hls' in tags or ext == 'm3u8':
	222	current_formats, current_subs = self._extract_m3u8_formats_and_subtitles(
	223	format_url, video_id, ext='mp4', headers=headers)
	224	elif 'package:dash' in tags or ext == 'mpd':
	225	current_formats, current_subs = self._extract_mpd_formats_and_subtitles(
	226	format_url, video_id, headers=headers)
	227	elif ext == 'f4m':
	228	pass # XXX: produce broken files
	229	else:
	230	current_formats = [{
	231	'url': format_url,
	232	'width': int_or_none(playback_set.get('width')),
	233	'height': int_or_none(playback_set.get('height')),
	234	}]
	235	except ExtractorError as e:
	236	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
	237	geo_restricted = True
	238	continue
	239
	240	if tag_dict.get('encryption') not in ('plain', None):
	241	for f in current_formats:
	242	f['has_drm'] = True
	243	for f in current_formats:
	244	for k, v in self._TAG_FIELDS.items():
	245	if not f.get(k):
	246	f[k] = tag_dict.get(v)
	247	if f.get('vcodec') != 'none' and not f.get('dynamic_range'):
	248	f['dynamic_range'] = tag_dict.get('dynamic_range')
	249	if f.get('acodec') != 'none' and not f.get('audio_channels'):
	250	f['audio_channels'] = {
	251	'stereo': 2,
	252	'dolby51': 6,
	253	}.get(tag_dict.get('audio_channel'))
	254	f['format_note'] = join_nonempty(
	255	tag_dict.get('ladder'),
	256	tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None,
	257	f.get('format_note'),
	258	delim=', ')
	259
	260	formats.extend(current_formats)
	261	subs = self._merge_subtitles(subs, current_subs)
	262
	263	if not formats and geo_restricted:
	264	self.raise_geo_restricted(countries=['IN'], metadata_available=True)
	265	self._remove_duplicate_formats(formats)
	266	for f in formats:
	267	f.setdefault('http_headers', {}).update(headers)
	268
	269	return {
	270	'id': video_id,
	271	'title': video_data.get('title'),
	272	'description': video_data.get('description'),
	273	'duration': int_or_none(video_data.get('duration')),
	274	'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
	275	'formats': formats,
	276	'subtitles': subs,
	277	'channel': video_data.get('channelName'),
	278	'channel_id': video_data.get('channelId'),
	279	'series': video_data.get('showName'),
	280	'season': video_data.get('seasonName'),
	281	'season_number': int_or_none(video_data.get('seasonNo')),
	282	'season_id': video_data.get('seasonId'),
	283	'episode': video_data.get('title'),
	284	'episode_number': int_or_none(video_data.get('episodeNo')),
	285	}
	286
	287
	288	class HotStarPrefixIE(InfoExtractor):
	289	""" The "hotstar:" prefix is no longer in use, but this is kept for backward compatibility """
	290	IE_DESC = False
	291	_VALID_URL = r'hotstar:(?:(?P<type>\w+):)?(?P<id>\d+)$'
	292	_TESTS = [{
	293	'url': 'hotstar:1000076273',
	294	'only_matching': True,
	295	}, {
	296	'url': 'hotstar:movies:1260009879',
	297	'info_dict': {
	298	'id': '1260009879',
	299	'ext': 'mp4',
	300	'title': 'Nuvvu Naaku Nachav',
	301	'description': 'md5:d43701b1314e6f8233ce33523c043b7d',
	302	'timestamp': 1567525674,
	303	'upload_date': '20190903',
	304	'duration': 10787,
	305	'episode': 'Nuvvu Naaku Nachav',
	306	},
	307	}, {
	308	'url': 'hotstar:episode:1000234847',
	309	'only_matching': True,
	310	}, {
	311	# contentData
	312	'url': 'hotstar:sports:1260065956',
	313	'only_matching': True,
	314	}, {
	315	# contentData
	316	'url': 'hotstar:sports:1260066104',
	317	'only_matching': True,
	318	}]
	319
	320	def _real_extract(self, url):
	321	video_id, video_type = self._match_valid_url(url).group('id', 'type')
	322	return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id)
	323
	324
	325	class HotStarPlaylistIE(HotStarBaseIE):
	326	IE_NAME = 'hotstar:playlist'
	327	_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv\|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
	328	_TESTS = [{
	329	'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
	330	'info_dict': {
	331	'id': '3_2_26',
	332	},
	333	'playlist_mincount': 20,
	334	}, {
	335	'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
	336	'only_matching': True,
	337	}, {
	338	'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
	339	'only_matching': True,
	340	}, {
	341	'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272',
	342	'only_matching': True,
	343	}]
	344
	345	def _real_extract(self, url):
	346	id_ = self._match_id(url)
	347	return self.playlist_result(
	348	self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_)
	349
	350
	351	class HotStarSeasonIE(HotStarBaseIE):
	352	IE_NAME = 'hotstar:season'
	353	_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv\|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
	354	_TESTS = [{
	355	'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
	356	'info_dict': {
	357	'id': '8028',
	358	},
	359	'playlist_mincount': 35,
	360	}, {
	361	'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357',
	362	'info_dict': {
	363	'id': '4357',
	364	},
	365	'playlist_mincount': 30,
	366	}, {
	367	'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/',
	368	'info_dict': {
	369	'id': '8208',
	370	},
	371	'playlist_mincount': 19,
	372	}, {
	373	'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/',
	374	'only_matching': True,
	375	}]
	376
	377	def _real_extract(self, url):
	378	url, season_id = self._match_valid_url(url).groups()
	379	return self.playlist_result(self._playlist_entries(
	380	'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id)
	381
	382
	383	class HotStarSeriesIE(HotStarBaseIE):
	384	IE_NAME = 'hotstar:series'
	385	_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv\|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]\|$)'
	386	_TESTS = [{
	387	'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
	388	'info_dict': {
	389	'id': '1260000646',
	390	},
	391	'playlist_mincount': 690,
	392	}, {
	393	'url': 'https://www.hotstar.com/tv/dancee-/1260050431',
	394	'info_dict': {
	395	'id': '1260050431',
	396	},
	397	'playlist_mincount': 43,
	398	}, {
	399	'url': 'https://www.hotstar.com/in/tv/mahabharat/435/',
	400	'info_dict': {
	401	'id': '435',
	402	},
	403	'playlist_mincount': 267,
	404	}, {
	405	'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/',
	406	'info_dict': {
	407	'id': '1260022017',
	408	},
	409	'playlist_mincount': 940,
	410	}]
	411
	412	def _real_extract(self, url):
	413	url, series_id = self._match_valid_url(url).groups()
	414	id_ = self._call_api_v1(
	415	'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id']
	416
	417	return self.playlist_result(self._playlist_entries(
	418	'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id)