]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/abematv.py
[rh:websockets] Migrate websockets to networking framework (#7720)
[yt-dlp.git] / yt_dlp / extractor / abematv.py
CommitLineData
f8271158 1import base64
2import binascii
bc83b4b0 3import functools
3e9b66d7
LNO
4import hashlib
5import hmac
f8271158 6import io
7import json
3e9b66d7
LNO
8import re
9import struct
f8271158 10import time
14f25df2 11import urllib.parse
ac668111 12import urllib.request
f9934b96 13import urllib.response
14import uuid
497bbbbd 15from ..utils.networking import clean_proxies
3e9b66d7
LNO
16from .common import InfoExtractor
17from ..aes import aes_ecb_decrypt
3e9b66d7
LNO
18from ..utils import (
19 ExtractorError,
f8271158 20 bytes_to_intlist,
7b2c3f47 21 decode_base_n,
3e9b66d7 22 int_or_none,
f8271158 23 intlist_to_bytes,
bc83b4b0 24 OnDemandPagedList,
3e9b66d7 25 time_seconds,
3e9b66d7 26 traverse_obj,
f8271158 27 update_url_query,
3e9b66d7
LNO
28)
29
3e9b66d7 30
9f662472 31def add_opener(ydl, handler): # FIXME: Create proper API in .networking
32 """Add a handler for opening URLs, like _download_webpage"""
3e9b66d7
LNO
33 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
34 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
9f662472 35 rh = ydl._request_director.handlers['Urllib']
36 if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
37 return
497bbbbd
S
38 headers = ydl.params['http_headers'].copy()
39 proxies = ydl.proxies.copy()
40 clean_proxies(proxies, headers)
41 opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
9f662472 42 assert isinstance(opener, urllib.request.OpenerDirector)
43 opener.add_handler(handler)
44 rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
3e9b66d7
LNO
45
46
ac668111 47class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7
LNO
48 handler_order = 499
49 STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
50 HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
51
52 def __init__(self, ie: 'AbemaTVIE'):
962ffcf8 53 # the protocol that this should really handle is 'abematv-license://'
3e9b66d7
LNO
54 # abematv_license_open is just a placeholder for development purposes
55 # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
56 setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
57 self.ie = ie
58
59 def _get_videokey_from_ticket(self, ticket):
9809740b 60 to_show = self.ie.get_param('verbose', False)
3e9b66d7
LNO
61 media_token = self.ie._get_media_token(to_show=to_show)
62
63 license_response = self.ie._download_json(
64 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
65 query={'t': media_token},
66 data=json.dumps({
67 'kv': 'a',
68 'lt': ticket
69 }).encode('utf-8'),
70 headers={
71 'Content-Type': 'application/json',
72 })
73
7b2c3f47 74 res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7
LNO
75 encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
76
77 h = hmac.new(
f8271158 78 binascii.unhexlify(self.HKEY),
3e9b66d7
LNO
79 (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
80 digestmod=hashlib.sha256)
81 enckey = bytes_to_intlist(h.digest())
82
83 return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
84
85 def abematv_license_open(self, url):
3d2623a8 86 url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
14f25df2 87 ticket = urllib.parse.urlparse(url).netloc
3e9b66d7 88 response_data = self._get_videokey_from_ticket(ticket)
f9934b96 89 return urllib.response.addinfourl(io.BytesIO(response_data), headers={
9f662472 90 'Content-Length': str(len(response_data)),
3e9b66d7
LNO
91 }, url=url, code=200)
92
93
94class AbemaTVBaseIE(InfoExtractor):
3e9b66d7
LNO
95 _USERTOKEN = None
96 _DEVICE_ID = None
3e9b66d7
LNO
97 _MEDIATOKEN = None
98
99 _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
100
bc83b4b0
L
101 @classmethod
102 def _generate_aks(cls, deviceid):
3e9b66d7
LNO
103 deviceid = deviceid.encode('utf-8')
104 # add 1 hour and then drop minute and secs
a4f16832 105 ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
3e9b66d7
LNO
106 time_struct = time.gmtime(ts_1hour)
107 ts_1hour_str = str(ts_1hour).encode('utf-8')
108
109 tmp = None
110
111 def mix_once(nonce):
112 nonlocal tmp
bc83b4b0 113 h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
3e9b66d7
LNO
114 h.update(nonce)
115 tmp = h.digest()
116
117 def mix_tmp(count):
118 nonlocal tmp
119 for i in range(count):
120 mix_once(tmp)
121
122 def mix_twist(nonce):
123 nonlocal tmp
f8271158 124 mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7 125
bc83b4b0 126 mix_once(cls._SECRETKEY)
3e9b66d7
LNO
127 mix_tmp(time_struct.tm_mon)
128 mix_twist(deviceid)
129 mix_tmp(time_struct.tm_mday % 5)
130 mix_twist(ts_1hour_str)
131 mix_tmp(time_struct.tm_hour % 5)
132
f8271158 133 return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7
LNO
134
135 def _get_device_token(self):
136 if self._USERTOKEN:
137 return self._USERTOKEN
138
a4f16832
L
139 username, _ = self._get_login_info()
140 AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
141 if AbemaTVBaseIE._USERTOKEN:
142 # try authentication with locally stored token
143 try:
144 self._get_media_token(True)
145 return
146 except ExtractorError as e:
147 self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
148
bc83b4b0 149 AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
3e9b66d7
LNO
150 aks = self._generate_aks(self._DEVICE_ID)
151 user_data = self._download_json(
152 'https://api.abema.io/v1/users', None, note='Authorizing',
153 data=json.dumps({
154 'deviceId': self._DEVICE_ID,
155 'applicationKeySecret': aks,
156 }).encode('utf-8'),
157 headers={
158 'Content-Type': 'application/json',
159 })
bc83b4b0 160 AbemaTVBaseIE._USERTOKEN = user_data['token']
3e9b66d7 161
3e9b66d7 162 add_opener(self._downloader, AbemaLicenseHandler(self))
3e9b66d7
LNO
163 return self._USERTOKEN
164
165 def _get_media_token(self, invalidate=False, to_show=True):
166 if not invalidate and self._MEDIATOKEN:
167 return self._MEDIATOKEN
168
bc83b4b0 169 AbemaTVBaseIE._MEDIATOKEN = self._download_json(
3e9b66d7
LNO
170 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
171 query={
172 'osName': 'android',
173 'osVersion': '6.0.1',
174 'osLang': 'ja_JP',
175 'osTimezone': 'Asia/Tokyo',
176 'appId': 'tv.abema',
177 'appVersion': '3.27.1'
178 }, headers={
bc83b4b0 179 'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7
LNO
180 })['token']
181
182 return self._MEDIATOKEN
183
bc83b4b0
L
184 def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
185 return self._download_json(
186 f'https://api.abema.io/{endpoint}', video_id, query=query or {},
187 note=note,
188 headers={
189 'Authorization': f'bearer {self._get_device_token()}',
190 })
191
192 def _extract_breadcrumb_list(self, webpage, video_id):
193 for jld in re.finditer(
194 r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
195 webpage):
196 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
197 if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
198 continue
199 items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
200 if items:
201 return items
202 return []
203
204
205class AbemaTVIE(AbemaTVBaseIE):
206 _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
207 _NETRC_MACHINE = 'abematv'
208 _TESTS = [{
209 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
210 'info_dict': {
211 'id': '194-25_s2_p1',
212 'title': '第1話 「チーズケーキ」 「モーニング再び」',
213 'series': '異世界食堂2',
214 'series_number': 2,
215 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
216 'episode_number': 1,
217 },
218 'skip': 'expired',
219 }, {
220 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
221 'info_dict': {
222 'id': 'E8tvAnMJ7a9a5d',
223 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
224 'series': 'ゆるキャン△ SEASON2',
225 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
226 'series_number': 2,
227 'episode_number': 1,
228 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
229 },
230 'skip': 'expired',
231 }, {
232 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
233 'info_dict': {
234 'id': 'E8tvAnMJ7a9a5d',
235 'title': '第5話『光射す』',
236 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
237 'thumbnail': r're:https://hayabusa\.io/.+',
238 'series': '相棒',
239 'episode': '第5話『光射す』',
240 },
241 'skip': 'expired',
242 }, {
243 'url': 'https://abema.tv/now-on-air/abema-anime',
244 'info_dict': {
245 'id': 'abema-anime',
246 # this varies
247 # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
248 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
249 'is_live': True,
250 },
251 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
252 }]
253 _TIMETABLE = None
254
52efa4b3 255 def _perform_login(self, username, password):
a4f16832
L
256 self._get_device_token()
257 if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
258 self.write_debug('Skipping logging in')
259 return
260
3e9b66d7
LNO
261 if '@' in username: # don't strictly check if it's email address or not
262 ep, method = 'user/email', 'email'
263 else:
264 ep, method = 'oneTimePassword', 'userId'
265
266 login_response = self._download_json(
267 f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
268 data=json.dumps({
269 method: username,
270 'password': password
271 }).encode('utf-8'), headers={
bc83b4b0 272 'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7
LNO
273 'Origin': 'https://abema.tv',
274 'Referer': 'https://abema.tv/',
275 'Content-Type': 'application/json',
276 })
277
bc83b4b0 278 AbemaTVBaseIE._USERTOKEN = login_response['token']
3e9b66d7 279 self._get_media_token(True)
a4f16832 280 self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
3e9b66d7
LNO
281
282 def _real_extract(self, url):
283 # starting download using infojson from this extractor is undefined behavior,
962ffcf8 284 # and never be fixed in the future; you must trigger downloads by directly specifying URL.
3e9b66d7
LNO
285 # (unless there's a way to hook before downloading by extractor)
286 video_id, video_type = self._match_valid_url(url).group('id', 'type')
287 headers = {
288 'Authorization': 'Bearer ' + self._get_device_token(),
289 }
290 video_type = video_type.split('/')[-1]
291
292 webpage = self._download_webpage(url, video_id)
293 canonical_url = self._search_regex(
294 r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
295 default=url)
296 info = self._search_json_ld(webpage, video_id, default={})
297
298 title = self._search_regex(
299 r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
300 if not title:
301 jsonld = None
302 for jld in re.finditer(
303 r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
304 webpage):
305 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
306 if jsonld:
307 break
308 if jsonld:
309 title = jsonld.get('caption')
310 if not title and video_type == 'now-on-air':
311 if not self._TIMETABLE:
312 # cache the timetable because it goes to 5MiB in size (!!)
313 self._TIMETABLE = self._download_json(
314 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
315 headers=headers)
316 now = time_seconds(hours=9)
317 for slot in self._TIMETABLE.get('slots', []):
318 if slot.get('channelId') != video_id:
319 continue
320 if slot['startAt'] <= now and now < slot['endAt']:
321 title = slot['title']
322 break
323
324 # read breadcrumb on top of page
325 breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
326 if breadcrumb:
62b58c09 327 # breadcrumb list translates to: (e.g. 1st test for this IE)
3e9b66d7
LNO
328 # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
329 # hence this works
330 info['series'] = breadcrumb[-2]
331 info['episode'] = breadcrumb[-1]
332 if not title:
333 title = info['episode']
334
335 description = self._html_search_regex(
336 (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
337 r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
338 webpage, 'description', default=None, group=1)
339 if not description:
340 og_desc = self._html_search_meta(
341 ('description', 'og:description', 'twitter:description'), webpage)
342 if og_desc:
343 description = re.sub(r'''(?sx)
344 ^(.+?)(?:
345 アニメの動画を無料で見るならABEMA!| # anime
346 等、.+ # applies for most of categories
347 )?
348 ''', r'\1', og_desc)
349
350 # canonical URL may contain series and episode number
351 mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
352 if mobj:
353 seri = int_or_none(mobj.group(1), default=float('inf'))
354 epis = int_or_none(mobj.group(2), default=float('inf'))
355 info['series_number'] = seri if seri < 100 else None
356 # some anime like Detective Conan (though not available in AbemaTV)
357 # has more than 1000 episodes (1026 as of 2021/11/15)
358 info['episode_number'] = epis if epis < 2000 else None
359
360 is_live, m3u8_url = False, None
361 if video_type == 'now-on-air':
362 is_live = True
363 channel_url = 'https://api.abema.io/v1/channels'
364 if video_id == 'news-global':
365 channel_url = update_url_query(channel_url, {'division': '1'})
366 onair_channels = self._download_json(channel_url, video_id)
367 for ch in onair_channels['channels']:
368 if video_id == ch['id']:
369 m3u8_url = ch['playback']['hls']
370 break
371 else:
372 raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
373 elif video_type == 'episode':
374 api_response = self._download_json(
375 f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
376 note='Checking playability',
377 headers=headers)
6839ae1f 378 ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
3e9b66d7
LNO
379 if 3 not in ondemand_types:
380 # cannot acquire decryption key for these streams
381 self.report_warning('This is a premium-only stream')
c449c065
L
382 info.update(traverse_obj(api_response, {
383 'series': ('series', 'title'),
384 'season': ('season', 'title'),
385 'season_number': ('season', 'sequence'),
386 'episode_number': ('episode', 'number'),
387 }))
388 if not title:
389 title = traverse_obj(api_response, ('episode', 'title'))
390 if not description:
391 description = traverse_obj(api_response, ('episode', 'content'))
3e9b66d7
LNO
392
393 m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
394 elif video_type == 'slots':
395 api_response = self._download_json(
396 f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
397 note='Checking playability',
398 headers=headers)
399 if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
400 self.report_warning('This is a premium-only stream')
401
402 m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
403 else:
404 raise ExtractorError('Unreachable')
405
406 if is_live:
407 self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
408 self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
409 formats = self._extract_m3u8_formats(
410 m3u8_url, video_id, ext='mp4', live=is_live)
411
412 info.update({
413 'id': video_id,
414 'title': title,
415 'description': description,
416 'formats': formats,
417 'is_live': is_live,
418 })
419 return info
420
421
422class AbemaTVTitleIE(AbemaTVBaseIE):
423 _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
bc83b4b0 424 _PAGE_SIZE = 25
3e9b66d7
LNO
425
426 _TESTS = [{
427 'url': 'https://abema.tv/video/title/90-1597',
428 'info_dict': {
429 'id': '90-1597',
430 'title': 'シャッフルアイランド',
431 },
432 'playlist_mincount': 2,
433 }, {
434 'url': 'https://abema.tv/video/title/193-132',
435 'info_dict': {
436 'id': '193-132',
437 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
438 },
439 'playlist_mincount': 16,
bc83b4b0
L
440 }, {
441 'url': 'https://abema.tv/video/title/25-102',
442 'info_dict': {
443 'id': '25-102',
444 'title': 'ソードアート・オンライン アリシゼーション',
445 },
446 'playlist_mincount': 24,
3e9b66d7
LNO
447 }]
448
bc83b4b0
L
449 def _fetch_page(self, playlist_id, series_version, page):
450 programs = self._call_api(
451 f'v1/video/series/{playlist_id}/programs', playlist_id,
452 note=f'Downloading page {page + 1}',
453 query={
454 'seriesVersion': series_version,
455 'offset': str(page * self._PAGE_SIZE),
456 'order': 'seq',
457 'limit': str(self._PAGE_SIZE),
458 })
459 yield from (
460 self.url_result(f'https://abema.tv/video/episode/{x}')
6839ae1f 461 for x in traverse_obj(programs, ('programs', ..., 'id')))
3e9b66d7 462
bc83b4b0
L
463 def _entries(self, playlist_id, series_version):
464 return OnDemandPagedList(
465 functools.partial(self._fetch_page, playlist_id, series_version),
466 self._PAGE_SIZE)
3e9b66d7 467
bc83b4b0
L
468 def _real_extract(self, url):
469 playlist_id = self._match_id(url)
470 series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
3e9b66d7 471
bc83b4b0
L
472 return self.playlist_result(
473 self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
474 playlist_title=series_info.get('title'),
475 playlist_description=series_info.get('content'))