jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import base64
	2	import binascii
	3	import functools
	4	import hashlib
	5	import hmac
	6	import io
	7	import json
	8	import re
	9	import struct
	10	import time
	11	import urllib.parse
	12	import urllib.request
	13	import urllib.response
	14	import uuid
	15	from ..utils.networking import clean_proxies
	16	from .common import InfoExtractor
	17	from ..aes import aes_ecb_decrypt
	18	from ..utils import (
	19	ExtractorError,
	20	bytes_to_intlist,
	21	decode_base_n,
	22	int_or_none,
	23	intlist_to_bytes,
	24	OnDemandPagedList,
	25	time_seconds,
	26	traverse_obj,
	27	update_url_query,
	28	)
	29
	30
	31	def add_opener(ydl, handler): # FIXME: Create proper API in .networking
	32	"""Add a handler for opening URLs, like _download_webpage"""
	33	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
	34	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
	35	rh = ydl._request_director.handlers['Urllib']
	36	if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
	37	return
	38	headers = ydl.params['http_headers'].copy()
	39	proxies = ydl.proxies.copy()
	40	clean_proxies(proxies, headers)
	41	opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
	42	assert isinstance(opener, urllib.request.OpenerDirector)
	43	opener.add_handler(handler)
	44	rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
	45
	46
	47	class AbemaLicenseHandler(urllib.request.BaseHandler):
	48	handler_order = 499
	49	STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
	50	HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
	51
	52	def __init__(self, ie: 'AbemaTVIE'):
	53	# the protocol that this should really handle is 'abematv-license://'
	54	# abematv_license_open is just a placeholder for development purposes
	55	# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
	56	setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
	57	self.ie = ie
	58
	59	def _get_videokey_from_ticket(self, ticket):
	60	to_show = self.ie.get_param('verbose', False)
	61	media_token = self.ie._get_media_token(to_show=to_show)
	62
	63	license_response = self.ie._download_json(
	64	'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
	65	query={'t': media_token},
	66	data=json.dumps({
	67	'kv': 'a',
	68	'lt': ticket
	69	}).encode('utf-8'),
	70	headers={
	71	'Content-Type': 'application/json',
	72	})
	73
	74	res = decode_base_n(license_response['k'], table=self.STRTABLE)
	75	encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
	76
	77	h = hmac.new(
	78	binascii.unhexlify(self.HKEY),
	79	(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
	80	digestmod=hashlib.sha256)
	81	enckey = bytes_to_intlist(h.digest())
	82
	83	return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
	84
	85	def abematv_license_open(self, url):
	86	url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
	87	ticket = urllib.parse.urlparse(url).netloc
	88	response_data = self._get_videokey_from_ticket(ticket)
	89	return urllib.response.addinfourl(io.BytesIO(response_data), headers={
	90	'Content-Length': str(len(response_data)),
	91	}, url=url, code=200)
	92
	93
	94	class AbemaTVBaseIE(InfoExtractor):
	95	_NETRC_MACHINE = 'abematv'
	96
	97	_USERTOKEN = None
	98	_DEVICE_ID = None
	99	_MEDIATOKEN = None
	100
	101	_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
	102
	103	@classmethod
	104	def _generate_aks(cls, deviceid):
	105	deviceid = deviceid.encode('utf-8')
	106	# add 1 hour and then drop minute and secs
	107	ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
	108	time_struct = time.gmtime(ts_1hour)
	109	ts_1hour_str = str(ts_1hour).encode('utf-8')
	110
	111	tmp = None
	112
	113	def mix_once(nonce):
	114	nonlocal tmp
	115	h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
	116	h.update(nonce)
	117	tmp = h.digest()
	118
	119	def mix_tmp(count):
	120	nonlocal tmp
	121	for i in range(count):
	122	mix_once(tmp)
	123
	124	def mix_twist(nonce):
	125	nonlocal tmp
	126	mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
	127
	128	mix_once(cls._SECRETKEY)
	129	mix_tmp(time_struct.tm_mon)
	130	mix_twist(deviceid)
	131	mix_tmp(time_struct.tm_mday % 5)
	132	mix_twist(ts_1hour_str)
	133	mix_tmp(time_struct.tm_hour % 5)
	134
	135	return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
	136
	137	def _get_device_token(self):
	138	if self._USERTOKEN:
	139	return self._USERTOKEN
	140
	141	add_opener(self._downloader, AbemaLicenseHandler(self))
	142
	143	username, _ = self._get_login_info()
	144	auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
	145	AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
	146	if AbemaTVBaseIE._USERTOKEN:
	147	# try authentication with locally stored token
	148	try:
	149	AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
	150	self._get_media_token(True)
	151	return
	152	except ExtractorError as e:
	153	self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
	154
	155	AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
	156	aks = self._generate_aks(self._DEVICE_ID)
	157	user_data = self._download_json(
	158	'https://api.abema.io/v1/users', None, note='Authorizing',
	159	data=json.dumps({
	160	'deviceId': self._DEVICE_ID,
	161	'applicationKeySecret': aks,
	162	}).encode('utf-8'),
	163	headers={
	164	'Content-Type': 'application/json',
	165	})
	166	AbemaTVBaseIE._USERTOKEN = user_data['token']
	167
	168	return self._USERTOKEN
	169
	170	def _get_media_token(self, invalidate=False, to_show=True):
	171	if not invalidate and self._MEDIATOKEN:
	172	return self._MEDIATOKEN
	173
	174	AbemaTVBaseIE._MEDIATOKEN = self._download_json(
	175	'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
	176	query={
	177	'osName': 'android',
	178	'osVersion': '6.0.1',
	179	'osLang': 'ja_JP',
	180	'osTimezone': 'Asia/Tokyo',
	181	'appId': 'tv.abema',
	182	'appVersion': '3.27.1'
	183	}, headers={
	184	'Authorization': f'bearer {self._get_device_token()}',
	185	})['token']
	186
	187	return self._MEDIATOKEN
	188
	189	def _perform_login(self, username, password):
	190	self._get_device_token()
	191	if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
	192	self.write_debug('Skipping logging in')
	193	return
	194
	195	if '@' in username: # don't strictly check if it's email address or not
	196	ep, method = 'user/email', 'email'
	197	else:
	198	ep, method = 'oneTimePassword', 'userId'
	199
	200	login_response = self._download_json(
	201	f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
	202	data=json.dumps({
	203	method: username,
	204	'password': password
	205	}).encode('utf-8'), headers={
	206	'Authorization': f'bearer {self._get_device_token()}',
	207	'Origin': 'https://abema.tv',
	208	'Referer': 'https://abema.tv/',
	209	'Content-Type': 'application/json',
	210	})
	211
	212	AbemaTVBaseIE._USERTOKEN = login_response['token']
	213	self._get_media_token(True)
	214	auth_cache = {
	215	'device_id': AbemaTVBaseIE._DEVICE_ID,
	216	'usertoken': AbemaTVBaseIE._USERTOKEN,
	217	}
	218	self.cache.store(self._NETRC_MACHINE, username, auth_cache)
	219
	220	def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
	221	return self._download_json(
	222	f'https://api.abema.io/{endpoint}', video_id, query=query or {},
	223	note=note,
	224	headers={
	225	'Authorization': f'bearer {self._get_device_token()}',
	226	})
	227
	228	def _extract_breadcrumb_list(self, webpage, video_id):
	229	for jld in re.finditer(
	230	r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
	231	webpage):
	232	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	233	if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
	234	continue
	235	items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
	236	if items:
	237	return items
	238	return []
	239
	240
	241	class AbemaTVIE(AbemaTVBaseIE):
	242	_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air\|video/episode\|channels/.+?/slots)/(?P<id>[^?/]+)'
	243	_TESTS = [{
	244	'url': 'https://abema.tv/video/episode/194-25_s2_p1',
	245	'info_dict': {
	246	'id': '194-25_s2_p1',
	247	'title': '第1話「チーズケーキ」　「モーニング再び」',
	248	'series': '異世界食堂２',
	249	'season': 'シーズン2',
	250	'season_number': 2,
	251	'episode': '第1話「チーズケーキ」　「モーニング再び」',
	252	'episode_number': 1,
	253	},
	254	'skip': 'expired',
	255	}, {
	256	'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
	257	'info_dict': {
	258	'id': 'E8tvAnMJ7a9a5d',
	259	'title': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	260	'series': 'ゆるキャン△ SEASON２',
	261	'episode': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	262	'series_number': 2,
	263	'episode_number': 1,
	264	'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
	265	},
	266	'skip': 'expired',
	267	}, {
	268	'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
	269	'info_dict': {
	270	'id': 'E8tvAnMJ7a9a5d',
	271	'title': '第5話『光射す』',
	272	'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
	273	'thumbnail': r're:https://hayabusa\.io/.+',
	274	'series': '相棒',
	275	'episode': '第5話『光射す』',
	276	},
	277	'skip': 'expired',
	278	}, {
	279	'url': 'https://abema.tv/now-on-air/abema-anime',
	280	'info_dict': {
	281	'id': 'abema-anime',
	282	# this varies
	283	# 'title': '女子高生の無駄づかい全話一挙【無料ビデオ72時間】',
	284	'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
	285	'is_live': True,
	286	},
	287	'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
	288	}]
	289	_TIMETABLE = None
	290
	291	def _real_extract(self, url):
	292	# starting download using infojson from this extractor is undefined behavior,
	293	# and never be fixed in the future; you must trigger downloads by directly specifying URL.
	294	# (unless there's a way to hook before downloading by extractor)
	295	video_id, video_type = self._match_valid_url(url).group('id', 'type')
	296	headers = {
	297	'Authorization': 'Bearer ' + self._get_device_token(),
	298	}
	299	video_type = video_type.split('/')[-1]
	300
	301	webpage = self._download_webpage(url, video_id)
	302	canonical_url = self._search_regex(
	303	r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
	304	default=url)
	305	info = self._search_json_ld(webpage, video_id, default={})
	306
	307	title = self._search_regex(
	308	r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
	309	if not title:
	310	jsonld = None
	311	for jld in re.finditer(
	312	r'(?is)<span\sclass="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]>(?P<json_ld>.+?)</script>',
	313	webpage):
	314	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	315	if jsonld:
	316	break
	317	if jsonld:
	318	title = jsonld.get('caption')
	319	if not title and video_type == 'now-on-air':
	320	if not self._TIMETABLE:
	321	# cache the timetable because it goes to 5MiB in size (!!)
	322	self._TIMETABLE = self._download_json(
	323	'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
	324	headers=headers)
	325	now = time_seconds(hours=9)
	326	for slot in self._TIMETABLE.get('slots', []):
	327	if slot.get('channelId') != video_id:
	328	continue
	329	if slot['startAt'] <= now and now < slot['endAt']:
	330	title = slot['title']
	331	break
	332
	333	# read breadcrumb on top of page
	334	breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
	335	if breadcrumb:
	336	# breadcrumb list translates to: (e.g. 1st test for this IE)
	337	# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
	338	# hence this works
	339	info['series'] = breadcrumb[-2]
	340	info['episode'] = breadcrumb[-1]
	341	if not title:
	342	title = info['episode']
	343
	344	description = self._html_search_regex(
	345	(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
	346	r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
	347	webpage, 'description', default=None, group=1)
	348	if not description:
	349	og_desc = self._html_search_meta(
	350	('description', 'og:description', 'twitter:description'), webpage)
	351	if og_desc:
	352	description = re.sub(r'''(?sx)
	353	^(.+?)(?:
	354	アニメの動画を無料で見るならABEMA！\| # anime
	355	等、.+ # applies for most of categories
	356	)?
	357	''', r'\1', og_desc)
	358
	359	# canonical URL may contain season and episode number
	360	mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
	361	if mobj:
	362	seri = int_or_none(mobj.group(1), default=float('inf'))
	363	epis = int_or_none(mobj.group(2), default=float('inf'))
	364	info['season_number'] = seri if seri < 100 else None
	365	# some anime like Detective Conan (though not available in AbemaTV)
	366	# has more than 1000 episodes (1026 as of 2021/11/15)
	367	info['episode_number'] = epis if epis < 2000 else None
	368
	369	is_live, m3u8_url = False, None
	370	if video_type == 'now-on-air':
	371	is_live = True
	372	channel_url = 'https://api.abema.io/v1/channels'
	373	if video_id == 'news-global':
	374	channel_url = update_url_query(channel_url, {'division': '1'})
	375	onair_channels = self._download_json(channel_url, video_id)
	376	for ch in onair_channels['channels']:
	377	if video_id == ch['id']:
	378	m3u8_url = ch['playback']['hls']
	379	break
	380	else:
	381	raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
	382	elif video_type == 'episode':
	383	api_response = self._download_json(
	384	f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
	385	note='Checking playability',
	386	headers=headers)
	387	ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
	388	if 3 not in ondemand_types:
	389	# cannot acquire decryption key for these streams
	390	self.report_warning('This is a premium-only stream')
	391	info.update(traverse_obj(api_response, {
	392	'series': ('series', 'title'),
	393	'season': ('season', 'name'),
	394	'season_number': ('season', 'sequence'),
	395	'episode_number': ('episode', 'number'),
	396	}))
	397	if not title:
	398	title = traverse_obj(api_response, ('episode', 'title'))
	399	if not description:
	400	description = traverse_obj(api_response, ('episode', 'content'))
	401
	402	m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
	403	elif video_type == 'slots':
	404	api_response = self._download_json(
	405	f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
	406	note='Checking playability',
	407	headers=headers)
	408	if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
	409	self.report_warning('This is a premium-only stream')
	410
	411	m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
	412	else:
	413	raise ExtractorError('Unreachable')
	414
	415	if is_live:
	416	self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
	417	self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
	418	formats = self._extract_m3u8_formats(
	419	m3u8_url, video_id, ext='mp4', live=is_live)
	420
	421	info.update({
	422	'id': video_id,
	423	'title': title,
	424	'description': description,
	425	'formats': formats,
	426	'is_live': is_live,
	427	})
	428	return info
	429
	430
	431	class AbemaTVTitleIE(AbemaTVBaseIE):
	432	_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
	433	_PAGE_SIZE = 25
	434
	435	_TESTS = [{
	436	'url': 'https://abema.tv/video/title/90-1597',
	437	'info_dict': {
	438	'id': '90-1597',
	439	'title': 'シャッフルアイランド',
	440	},
	441	'playlist_mincount': 2,
	442	}, {
	443	'url': 'https://abema.tv/video/title/193-132',
	444	'info_dict': {
	445	'id': '193-132',
	446	'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
	447	},
	448	'playlist_mincount': 16,
	449	}, {
	450	'url': 'https://abema.tv/video/title/25-102',
	451	'info_dict': {
	452	'id': '25-102',
	453	'title': 'ソードアート・オンラインアリシゼーション',
	454	},
	455	'playlist_mincount': 24,
	456	}]
	457
	458	def _fetch_page(self, playlist_id, series_version, page):
	459	programs = self._call_api(
	460	f'v1/video/series/{playlist_id}/programs', playlist_id,
	461	note=f'Downloading page {page + 1}',
	462	query={
	463	'seriesVersion': series_version,
	464	'offset': str(page * self._PAGE_SIZE),
	465	'order': 'seq',
	466	'limit': str(self._PAGE_SIZE),
	467	})
	468	yield from (
	469	self.url_result(f'https://abema.tv/video/episode/{x}')
	470	for x in traverse_obj(programs, ('programs', ..., 'id')))
	471
	472	def _entries(self, playlist_id, series_version):
	473	return OnDemandPagedList(
	474	functools.partial(self._fetch_page, playlist_id, series_version),
	475	self._PAGE_SIZE)
	476
	477	def _real_extract(self, url):
	478	playlist_id = self._match_id(url)
	479	series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
	480
	481	return self.playlist_result(
	482	self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
	483	playlist_title=series_info.get('title'),
	484	playlist_description=series_info.get('content'))