]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/abematv.py
[ie/abematv] Temporary fix for protocol handler
[yt-dlp.git] / yt_dlp / extractor / abematv.py
CommitLineData
f8271158 1import base64
2import binascii
bc83b4b0 3import functools
3e9b66d7
LNO
4import hashlib
5import hmac
f8271158 6import io
7import json
3e9b66d7
LNO
8import re
9import struct
f8271158 10import time
14f25df2 11import urllib.parse
ac668111 12import urllib.request
f9934b96 13import urllib.response
14import uuid
3e9b66d7 15
3e9b66d7
LNO
16from .common import InfoExtractor
17from ..aes import aes_ecb_decrypt
3e9b66d7
LNO
18from ..utils import (
19 ExtractorError,
f8271158 20 bytes_to_intlist,
7b2c3f47 21 decode_base_n,
3e9b66d7 22 int_or_none,
f8271158 23 intlist_to_bytes,
bc83b4b0 24 OnDemandPagedList,
3e9b66d7 25 time_seconds,
3e9b66d7 26 traverse_obj,
f8271158 27 update_url_query,
3e9b66d7
LNO
28)
29
3e9b66d7 30
9f662472 31def add_opener(ydl, handler): # FIXME: Create proper API in .networking
32 """Add a handler for opening URLs, like _download_webpage"""
3e9b66d7
LNO
33 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
34 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
9f662472 35 rh = ydl._request_director.handlers['Urllib']
36 if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
37 return
38 opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
39 assert isinstance(opener, urllib.request.OpenerDirector)
40 opener.add_handler(handler)
41 rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
3e9b66d7
LNO
42
43
ac668111 44class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7
LNO
45 handler_order = 499
46 STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
47 HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
48
49 def __init__(self, ie: 'AbemaTVIE'):
962ffcf8 50 # the protocol that this should really handle is 'abematv-license://'
3e9b66d7
LNO
51 # abematv_license_open is just a placeholder for development purposes
52 # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
53 setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
54 self.ie = ie
55
56 def _get_videokey_from_ticket(self, ticket):
9809740b 57 to_show = self.ie.get_param('verbose', False)
3e9b66d7
LNO
58 media_token = self.ie._get_media_token(to_show=to_show)
59
60 license_response = self.ie._download_json(
61 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
62 query={'t': media_token},
63 data=json.dumps({
64 'kv': 'a',
65 'lt': ticket
66 }).encode('utf-8'),
67 headers={
68 'Content-Type': 'application/json',
69 })
70
7b2c3f47 71 res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7
LNO
72 encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
73
74 h = hmac.new(
f8271158 75 binascii.unhexlify(self.HKEY),
3e9b66d7
LNO
76 (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
77 digestmod=hashlib.sha256)
78 enckey = bytes_to_intlist(h.digest())
79
80 return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
81
82 def abematv_license_open(self, url):
3d2623a8 83 url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
14f25df2 84 ticket = urllib.parse.urlparse(url).netloc
3e9b66d7 85 response_data = self._get_videokey_from_ticket(ticket)
f9934b96 86 return urllib.response.addinfourl(io.BytesIO(response_data), headers={
9f662472 87 'Content-Length': str(len(response_data)),
3e9b66d7
LNO
88 }, url=url, code=200)
89
90
91class AbemaTVBaseIE(InfoExtractor):
3e9b66d7
LNO
92 _USERTOKEN = None
93 _DEVICE_ID = None
3e9b66d7
LNO
94 _MEDIATOKEN = None
95
96 _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
97
bc83b4b0
L
98 @classmethod
99 def _generate_aks(cls, deviceid):
3e9b66d7
LNO
100 deviceid = deviceid.encode('utf-8')
101 # add 1 hour and then drop minute and secs
a4f16832 102 ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
3e9b66d7
LNO
103 time_struct = time.gmtime(ts_1hour)
104 ts_1hour_str = str(ts_1hour).encode('utf-8')
105
106 tmp = None
107
108 def mix_once(nonce):
109 nonlocal tmp
bc83b4b0 110 h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
3e9b66d7
LNO
111 h.update(nonce)
112 tmp = h.digest()
113
114 def mix_tmp(count):
115 nonlocal tmp
116 for i in range(count):
117 mix_once(tmp)
118
119 def mix_twist(nonce):
120 nonlocal tmp
f8271158 121 mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7 122
bc83b4b0 123 mix_once(cls._SECRETKEY)
3e9b66d7
LNO
124 mix_tmp(time_struct.tm_mon)
125 mix_twist(deviceid)
126 mix_tmp(time_struct.tm_mday % 5)
127 mix_twist(ts_1hour_str)
128 mix_tmp(time_struct.tm_hour % 5)
129
f8271158 130 return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7
LNO
131
132 def _get_device_token(self):
133 if self._USERTOKEN:
134 return self._USERTOKEN
135
a4f16832
L
136 username, _ = self._get_login_info()
137 AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
138 if AbemaTVBaseIE._USERTOKEN:
139 # try authentication with locally stored token
140 try:
141 self._get_media_token(True)
142 return
143 except ExtractorError as e:
144 self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
145
bc83b4b0 146 AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
3e9b66d7
LNO
147 aks = self._generate_aks(self._DEVICE_ID)
148 user_data = self._download_json(
149 'https://api.abema.io/v1/users', None, note='Authorizing',
150 data=json.dumps({
151 'deviceId': self._DEVICE_ID,
152 'applicationKeySecret': aks,
153 }).encode('utf-8'),
154 headers={
155 'Content-Type': 'application/json',
156 })
bc83b4b0 157 AbemaTVBaseIE._USERTOKEN = user_data['token']
3e9b66d7 158
3e9b66d7 159 add_opener(self._downloader, AbemaLicenseHandler(self))
3e9b66d7
LNO
160 return self._USERTOKEN
161
162 def _get_media_token(self, invalidate=False, to_show=True):
163 if not invalidate and self._MEDIATOKEN:
164 return self._MEDIATOKEN
165
bc83b4b0 166 AbemaTVBaseIE._MEDIATOKEN = self._download_json(
3e9b66d7
LNO
167 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
168 query={
169 'osName': 'android',
170 'osVersion': '6.0.1',
171 'osLang': 'ja_JP',
172 'osTimezone': 'Asia/Tokyo',
173 'appId': 'tv.abema',
174 'appVersion': '3.27.1'
175 }, headers={
bc83b4b0 176 'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7
LNO
177 })['token']
178
179 return self._MEDIATOKEN
180
bc83b4b0
L
181 def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
182 return self._download_json(
183 f'https://api.abema.io/{endpoint}', video_id, query=query or {},
184 note=note,
185 headers={
186 'Authorization': f'bearer {self._get_device_token()}',
187 })
188
189 def _extract_breadcrumb_list(self, webpage, video_id):
190 for jld in re.finditer(
191 r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
192 webpage):
193 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
194 if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
195 continue
196 items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
197 if items:
198 return items
199 return []
200
201
202class AbemaTVIE(AbemaTVBaseIE):
203 _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
204 _NETRC_MACHINE = 'abematv'
205 _TESTS = [{
206 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
207 'info_dict': {
208 'id': '194-25_s2_p1',
209 'title': '第1話 「チーズケーキ」 「モーニング再び」',
210 'series': '異世界食堂2',
211 'series_number': 2,
212 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
213 'episode_number': 1,
214 },
215 'skip': 'expired',
216 }, {
217 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
218 'info_dict': {
219 'id': 'E8tvAnMJ7a9a5d',
220 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
221 'series': 'ゆるキャン△ SEASON2',
222 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
223 'series_number': 2,
224 'episode_number': 1,
225 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
226 },
227 'skip': 'expired',
228 }, {
229 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
230 'info_dict': {
231 'id': 'E8tvAnMJ7a9a5d',
232 'title': '第5話『光射す』',
233 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
234 'thumbnail': r're:https://hayabusa\.io/.+',
235 'series': '相棒',
236 'episode': '第5話『光射す』',
237 },
238 'skip': 'expired',
239 }, {
240 'url': 'https://abema.tv/now-on-air/abema-anime',
241 'info_dict': {
242 'id': 'abema-anime',
243 # this varies
244 # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
245 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
246 'is_live': True,
247 },
248 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
249 }]
250 _TIMETABLE = None
251
52efa4b3 252 def _perform_login(self, username, password):
a4f16832
L
253 self._get_device_token()
254 if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
255 self.write_debug('Skipping logging in')
256 return
257
3e9b66d7
LNO
258 if '@' in username: # don't strictly check if it's email address or not
259 ep, method = 'user/email', 'email'
260 else:
261 ep, method = 'oneTimePassword', 'userId'
262
263 login_response = self._download_json(
264 f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
265 data=json.dumps({
266 method: username,
267 'password': password
268 }).encode('utf-8'), headers={
bc83b4b0 269 'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7
LNO
270 'Origin': 'https://abema.tv',
271 'Referer': 'https://abema.tv/',
272 'Content-Type': 'application/json',
273 })
274
bc83b4b0 275 AbemaTVBaseIE._USERTOKEN = login_response['token']
3e9b66d7 276 self._get_media_token(True)
a4f16832 277 self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
3e9b66d7
LNO
278
279 def _real_extract(self, url):
280 # starting download using infojson from this extractor is undefined behavior,
962ffcf8 281 # and never be fixed in the future; you must trigger downloads by directly specifying URL.
3e9b66d7
LNO
282 # (unless there's a way to hook before downloading by extractor)
283 video_id, video_type = self._match_valid_url(url).group('id', 'type')
284 headers = {
285 'Authorization': 'Bearer ' + self._get_device_token(),
286 }
287 video_type = video_type.split('/')[-1]
288
289 webpage = self._download_webpage(url, video_id)
290 canonical_url = self._search_regex(
291 r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
292 default=url)
293 info = self._search_json_ld(webpage, video_id, default={})
294
295 title = self._search_regex(
296 r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
297 if not title:
298 jsonld = None
299 for jld in re.finditer(
300 r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
301 webpage):
302 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
303 if jsonld:
304 break
305 if jsonld:
306 title = jsonld.get('caption')
307 if not title and video_type == 'now-on-air':
308 if not self._TIMETABLE:
309 # cache the timetable because it goes to 5MiB in size (!!)
310 self._TIMETABLE = self._download_json(
311 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
312 headers=headers)
313 now = time_seconds(hours=9)
314 for slot in self._TIMETABLE.get('slots', []):
315 if slot.get('channelId') != video_id:
316 continue
317 if slot['startAt'] <= now and now < slot['endAt']:
318 title = slot['title']
319 break
320
321 # read breadcrumb on top of page
322 breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
323 if breadcrumb:
62b58c09 324 # breadcrumb list translates to: (e.g. 1st test for this IE)
3e9b66d7
LNO
325 # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
326 # hence this works
327 info['series'] = breadcrumb[-2]
328 info['episode'] = breadcrumb[-1]
329 if not title:
330 title = info['episode']
331
332 description = self._html_search_regex(
333 (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
334 r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
335 webpage, 'description', default=None, group=1)
336 if not description:
337 og_desc = self._html_search_meta(
338 ('description', 'og:description', 'twitter:description'), webpage)
339 if og_desc:
340 description = re.sub(r'''(?sx)
341 ^(.+?)(?:
342 アニメの動画を無料で見るならABEMA!| # anime
343 等、.+ # applies for most of categories
344 )?
345 ''', r'\1', og_desc)
346
347 # canonical URL may contain series and episode number
348 mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
349 if mobj:
350 seri = int_or_none(mobj.group(1), default=float('inf'))
351 epis = int_or_none(mobj.group(2), default=float('inf'))
352 info['series_number'] = seri if seri < 100 else None
353 # some anime like Detective Conan (though not available in AbemaTV)
354 # has more than 1000 episodes (1026 as of 2021/11/15)
355 info['episode_number'] = epis if epis < 2000 else None
356
357 is_live, m3u8_url = False, None
358 if video_type == 'now-on-air':
359 is_live = True
360 channel_url = 'https://api.abema.io/v1/channels'
361 if video_id == 'news-global':
362 channel_url = update_url_query(channel_url, {'division': '1'})
363 onair_channels = self._download_json(channel_url, video_id)
364 for ch in onair_channels['channels']:
365 if video_id == ch['id']:
366 m3u8_url = ch['playback']['hls']
367 break
368 else:
369 raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
370 elif video_type == 'episode':
371 api_response = self._download_json(
372 f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
373 note='Checking playability',
374 headers=headers)
6839ae1f 375 ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
3e9b66d7
LNO
376 if 3 not in ondemand_types:
377 # cannot acquire decryption key for these streams
378 self.report_warning('This is a premium-only stream')
c449c065
L
379 info.update(traverse_obj(api_response, {
380 'series': ('series', 'title'),
381 'season': ('season', 'title'),
382 'season_number': ('season', 'sequence'),
383 'episode_number': ('episode', 'number'),
384 }))
385 if not title:
386 title = traverse_obj(api_response, ('episode', 'title'))
387 if not description:
388 description = traverse_obj(api_response, ('episode', 'content'))
3e9b66d7
LNO
389
390 m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
391 elif video_type == 'slots':
392 api_response = self._download_json(
393 f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
394 note='Checking playability',
395 headers=headers)
396 if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
397 self.report_warning('This is a premium-only stream')
398
399 m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
400 else:
401 raise ExtractorError('Unreachable')
402
403 if is_live:
404 self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
405 self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
406 formats = self._extract_m3u8_formats(
407 m3u8_url, video_id, ext='mp4', live=is_live)
408
409 info.update({
410 'id': video_id,
411 'title': title,
412 'description': description,
413 'formats': formats,
414 'is_live': is_live,
415 })
416 return info
417
418
419class AbemaTVTitleIE(AbemaTVBaseIE):
420 _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
bc83b4b0 421 _PAGE_SIZE = 25
3e9b66d7
LNO
422
423 _TESTS = [{
424 'url': 'https://abema.tv/video/title/90-1597',
425 'info_dict': {
426 'id': '90-1597',
427 'title': 'シャッフルアイランド',
428 },
429 'playlist_mincount': 2,
430 }, {
431 'url': 'https://abema.tv/video/title/193-132',
432 'info_dict': {
433 'id': '193-132',
434 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
435 },
436 'playlist_mincount': 16,
bc83b4b0
L
437 }, {
438 'url': 'https://abema.tv/video/title/25-102',
439 'info_dict': {
440 'id': '25-102',
441 'title': 'ソードアート・オンライン アリシゼーション',
442 },
443 'playlist_mincount': 24,
3e9b66d7
LNO
444 }]
445
bc83b4b0
L
446 def _fetch_page(self, playlist_id, series_version, page):
447 programs = self._call_api(
448 f'v1/video/series/{playlist_id}/programs', playlist_id,
449 note=f'Downloading page {page + 1}',
450 query={
451 'seriesVersion': series_version,
452 'offset': str(page * self._PAGE_SIZE),
453 'order': 'seq',
454 'limit': str(self._PAGE_SIZE),
455 })
456 yield from (
457 self.url_result(f'https://abema.tv/video/episode/{x}')
6839ae1f 458 for x in traverse_obj(programs, ('programs', ..., 'id')))
3e9b66d7 459
bc83b4b0
L
460 def _entries(self, playlist_id, series_version):
461 return OnDemandPagedList(
462 functools.partial(self._fetch_page, playlist_id, series_version),
463 self._PAGE_SIZE)
3e9b66d7 464
bc83b4b0
L
465 def _real_extract(self, url):
466 playlist_id = self._match_id(url)
467 series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
3e9b66d7 468
bc83b4b0
L
469 return self.playlist_result(
470 self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
471 playlist_title=series_info.get('title'),
472 playlist_description=series_info.get('content'))