]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/abematv.py
[docs] Consistent use of `e.g.` (#4643)
[yt-dlp.git] / yt_dlp / extractor / abematv.py
CommitLineData
f8271158 1import base64
2import binascii
bc83b4b0 3import functools
3e9b66d7
LNO
4import hashlib
5import hmac
f8271158 6import io
7import json
3e9b66d7
LNO
8import re
9import struct
f8271158 10import time
14f25df2 11import urllib.parse
ac668111 12import urllib.request
f9934b96 13import urllib.response
14import uuid
3e9b66d7 15
3e9b66d7
LNO
16from .common import InfoExtractor
17from ..aes import aes_ecb_decrypt
3e9b66d7
LNO
18from ..utils import (
19 ExtractorError,
f8271158 20 bytes_to_intlist,
7b2c3f47 21 decode_base_n,
3e9b66d7 22 int_or_none,
f8271158 23 intlist_to_bytes,
bc83b4b0 24 OnDemandPagedList,
3e9b66d7
LNO
25 request_to_url,
26 time_seconds,
3e9b66d7 27 traverse_obj,
f8271158 28 update_url_query,
3e9b66d7
LNO
29)
30
3e9b66d7
LNO
31# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
32
e5a998f3 33
08d30158 34def add_opener(ydl, handler):
3e9b66d7
LNO
35 ''' Add a handler for opening URLs, like _download_webpage '''
36 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
37 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
ac668111 38 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
08d30158 39 ydl._opener.add_handler(handler)
3e9b66d7
LNO
40
41
08d30158 42def remove_opener(ydl, handler):
3e9b66d7
LNO
43 '''
44 Remove handler(s) for opening URLs
45 @param handler Either handler object itself or handler type.
46 Specifying handler type will remove all handler which isinstance returns True.
47 '''
48 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
49 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
08d30158 50 opener = ydl._opener
ac668111 51 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
3e9b66d7
LNO
52 if isinstance(handler, (type, tuple)):
53 find_cp = lambda x: isinstance(x, handler)
54 else:
55 find_cp = lambda x: x is handler
56
57 removed = []
58 for meth in dir(handler):
59 if meth in ["redirect_request", "do_open", "proxy_open"]:
60 # oops, coincidental match
61 continue
62
63 i = meth.find("_")
64 protocol = meth[:i]
65 condition = meth[i + 1:]
66
67 if condition.startswith("error"):
68 j = condition.find("_") + i + 1
69 kind = meth[j + 1:]
70 try:
71 kind = int(kind)
72 except ValueError:
73 pass
74 lookup = opener.handle_error.get(protocol, {})
75 opener.handle_error[protocol] = lookup
76 elif condition == "open":
77 kind = protocol
78 lookup = opener.handle_open
79 elif condition == "response":
80 kind = protocol
81 lookup = opener.process_response
82 elif condition == "request":
83 kind = protocol
84 lookup = opener.process_request
85 else:
86 continue
87
88 handlers = lookup.setdefault(kind, [])
89 if handlers:
90 handlers[:] = [x for x in handlers if not find_cp(x)]
91
92 removed.append(x for x in handlers if find_cp(x))
93
94 if removed:
95 for x in opener.handlers:
96 if find_cp(x):
97 x.add_parent(None)
98 opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
99
100
ac668111 101class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7
LNO
102 handler_order = 499
103 STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
104 HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
105
106 def __init__(self, ie: 'AbemaTVIE'):
962ffcf8 107 # the protocol that this should really handle is 'abematv-license://'
3e9b66d7
LNO
108 # abematv_license_open is just a placeholder for development purposes
109 # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
110 setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
111 self.ie = ie
112
113 def _get_videokey_from_ticket(self, ticket):
9809740b 114 to_show = self.ie.get_param('verbose', False)
3e9b66d7
LNO
115 media_token = self.ie._get_media_token(to_show=to_show)
116
117 license_response = self.ie._download_json(
118 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
119 query={'t': media_token},
120 data=json.dumps({
121 'kv': 'a',
122 'lt': ticket
123 }).encode('utf-8'),
124 headers={
125 'Content-Type': 'application/json',
126 })
127
7b2c3f47 128 res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7
LNO
129 encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
130
131 h = hmac.new(
f8271158 132 binascii.unhexlify(self.HKEY),
3e9b66d7
LNO
133 (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
134 digestmod=hashlib.sha256)
135 enckey = bytes_to_intlist(h.digest())
136
137 return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
138
139 def abematv_license_open(self, url):
140 url = request_to_url(url)
14f25df2 141 ticket = urllib.parse.urlparse(url).netloc
3e9b66d7 142 response_data = self._get_videokey_from_ticket(ticket)
f9934b96 143 return urllib.response.addinfourl(io.BytesIO(response_data), headers={
3e9b66d7
LNO
144 'Content-Length': len(response_data),
145 }, url=url, code=200)
146
147
148class AbemaTVBaseIE(InfoExtractor):
3e9b66d7
LNO
149 _USERTOKEN = None
150 _DEVICE_ID = None
3e9b66d7
LNO
151 _MEDIATOKEN = None
152
153 _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
154
bc83b4b0
L
155 @classmethod
156 def _generate_aks(cls, deviceid):
3e9b66d7
LNO
157 deviceid = deviceid.encode('utf-8')
158 # add 1 hour and then drop minute and secs
159 ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
160 time_struct = time.gmtime(ts_1hour)
161 ts_1hour_str = str(ts_1hour).encode('utf-8')
162
163 tmp = None
164
165 def mix_once(nonce):
166 nonlocal tmp
bc83b4b0 167 h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
3e9b66d7
LNO
168 h.update(nonce)
169 tmp = h.digest()
170
171 def mix_tmp(count):
172 nonlocal tmp
173 for i in range(count):
174 mix_once(tmp)
175
176 def mix_twist(nonce):
177 nonlocal tmp
f8271158 178 mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7 179
bc83b4b0 180 mix_once(cls._SECRETKEY)
3e9b66d7
LNO
181 mix_tmp(time_struct.tm_mon)
182 mix_twist(deviceid)
183 mix_tmp(time_struct.tm_mday % 5)
184 mix_twist(ts_1hour_str)
185 mix_tmp(time_struct.tm_hour % 5)
186
f8271158 187 return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7
LNO
188
189 def _get_device_token(self):
190 if self._USERTOKEN:
191 return self._USERTOKEN
192
bc83b4b0 193 AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
3e9b66d7
LNO
194 aks = self._generate_aks(self._DEVICE_ID)
195 user_data = self._download_json(
196 'https://api.abema.io/v1/users', None, note='Authorizing',
197 data=json.dumps({
198 'deviceId': self._DEVICE_ID,
199 'applicationKeySecret': aks,
200 }).encode('utf-8'),
201 headers={
202 'Content-Type': 'application/json',
203 })
bc83b4b0 204 AbemaTVBaseIE._USERTOKEN = user_data['token']
3e9b66d7
LNO
205
206 # don't allow adding it 2 times or more, though it's guarded
207 remove_opener(self._downloader, AbemaLicenseHandler)
208 add_opener(self._downloader, AbemaLicenseHandler(self))
209
210 return self._USERTOKEN
211
212 def _get_media_token(self, invalidate=False, to_show=True):
213 if not invalidate and self._MEDIATOKEN:
214 return self._MEDIATOKEN
215
bc83b4b0 216 AbemaTVBaseIE._MEDIATOKEN = self._download_json(
3e9b66d7
LNO
217 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
218 query={
219 'osName': 'android',
220 'osVersion': '6.0.1',
221 'osLang': 'ja_JP',
222 'osTimezone': 'Asia/Tokyo',
223 'appId': 'tv.abema',
224 'appVersion': '3.27.1'
225 }, headers={
bc83b4b0 226 'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7
LNO
227 })['token']
228
229 return self._MEDIATOKEN
230
bc83b4b0
L
231 def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
232 return self._download_json(
233 f'https://api.abema.io/{endpoint}', video_id, query=query or {},
234 note=note,
235 headers={
236 'Authorization': f'bearer {self._get_device_token()}',
237 })
238
239 def _extract_breadcrumb_list(self, webpage, video_id):
240 for jld in re.finditer(
241 r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
242 webpage):
243 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
244 if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
245 continue
246 items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
247 if items:
248 return items
249 return []
250
251
252class AbemaTVIE(AbemaTVBaseIE):
253 _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
254 _NETRC_MACHINE = 'abematv'
255 _TESTS = [{
256 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
257 'info_dict': {
258 'id': '194-25_s2_p1',
259 'title': '第1話 「チーズケーキ」 「モーニング再び」',
260 'series': '異世界食堂2',
261 'series_number': 2,
262 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
263 'episode_number': 1,
264 },
265 'skip': 'expired',
266 }, {
267 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
268 'info_dict': {
269 'id': 'E8tvAnMJ7a9a5d',
270 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
271 'series': 'ゆるキャン△ SEASON2',
272 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
273 'series_number': 2,
274 'episode_number': 1,
275 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
276 },
277 'skip': 'expired',
278 }, {
279 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
280 'info_dict': {
281 'id': 'E8tvAnMJ7a9a5d',
282 'title': '第5話『光射す』',
283 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
284 'thumbnail': r're:https://hayabusa\.io/.+',
285 'series': '相棒',
286 'episode': '第5話『光射す』',
287 },
288 'skip': 'expired',
289 }, {
290 'url': 'https://abema.tv/now-on-air/abema-anime',
291 'info_dict': {
292 'id': 'abema-anime',
293 # this varies
294 # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
295 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
296 'is_live': True,
297 },
298 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
299 }]
300 _TIMETABLE = None
301
52efa4b3 302 def _perform_login(self, username, password):
3e9b66d7
LNO
303 if '@' in username: # don't strictly check if it's email address or not
304 ep, method = 'user/email', 'email'
305 else:
306 ep, method = 'oneTimePassword', 'userId'
307
308 login_response = self._download_json(
309 f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
310 data=json.dumps({
311 method: username,
312 'password': password
313 }).encode('utf-8'), headers={
bc83b4b0 314 'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7
LNO
315 'Origin': 'https://abema.tv',
316 'Referer': 'https://abema.tv/',
317 'Content-Type': 'application/json',
318 })
319
bc83b4b0 320 AbemaTVBaseIE._USERTOKEN = login_response['token']
3e9b66d7
LNO
321 self._get_media_token(True)
322
323 def _real_extract(self, url):
324 # starting download using infojson from this extractor is undefined behavior,
962ffcf8 325 # and never be fixed in the future; you must trigger downloads by directly specifying URL.
3e9b66d7
LNO
326 # (unless there's a way to hook before downloading by extractor)
327 video_id, video_type = self._match_valid_url(url).group('id', 'type')
328 headers = {
329 'Authorization': 'Bearer ' + self._get_device_token(),
330 }
331 video_type = video_type.split('/')[-1]
332
333 webpage = self._download_webpage(url, video_id)
334 canonical_url = self._search_regex(
335 r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
336 default=url)
337 info = self._search_json_ld(webpage, video_id, default={})
338
339 title = self._search_regex(
340 r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
341 if not title:
342 jsonld = None
343 for jld in re.finditer(
344 r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
345 webpage):
346 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
347 if jsonld:
348 break
349 if jsonld:
350 title = jsonld.get('caption')
351 if not title and video_type == 'now-on-air':
352 if not self._TIMETABLE:
353 # cache the timetable because it goes to 5MiB in size (!!)
354 self._TIMETABLE = self._download_json(
355 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
356 headers=headers)
357 now = time_seconds(hours=9)
358 for slot in self._TIMETABLE.get('slots', []):
359 if slot.get('channelId') != video_id:
360 continue
361 if slot['startAt'] <= now and now < slot['endAt']:
362 title = slot['title']
363 break
364
365 # read breadcrumb on top of page
366 breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
367 if breadcrumb:
62b58c09 368 # breadcrumb list translates to: (e.g. 1st test for this IE)
3e9b66d7
LNO
369 # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
370 # hence this works
371 info['series'] = breadcrumb[-2]
372 info['episode'] = breadcrumb[-1]
373 if not title:
374 title = info['episode']
375
376 description = self._html_search_regex(
377 (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
378 r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
379 webpage, 'description', default=None, group=1)
380 if not description:
381 og_desc = self._html_search_meta(
382 ('description', 'og:description', 'twitter:description'), webpage)
383 if og_desc:
384 description = re.sub(r'''(?sx)
385 ^(.+?)(?:
386 アニメの動画を無料で見るならABEMA!| # anime
387 等、.+ # applies for most of categories
388 )?
389 ''', r'\1', og_desc)
390
391 # canonical URL may contain series and episode number
392 mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
393 if mobj:
394 seri = int_or_none(mobj.group(1), default=float('inf'))
395 epis = int_or_none(mobj.group(2), default=float('inf'))
396 info['series_number'] = seri if seri < 100 else None
397 # some anime like Detective Conan (though not available in AbemaTV)
398 # has more than 1000 episodes (1026 as of 2021/11/15)
399 info['episode_number'] = epis if epis < 2000 else None
400
401 is_live, m3u8_url = False, None
402 if video_type == 'now-on-air':
403 is_live = True
404 channel_url = 'https://api.abema.io/v1/channels'
405 if video_id == 'news-global':
406 channel_url = update_url_query(channel_url, {'division': '1'})
407 onair_channels = self._download_json(channel_url, video_id)
408 for ch in onair_channels['channels']:
409 if video_id == ch['id']:
410 m3u8_url = ch['playback']['hls']
411 break
412 else:
413 raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
414 elif video_type == 'episode':
415 api_response = self._download_json(
416 f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
417 note='Checking playability',
418 headers=headers)
419 ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[])
420 if 3 not in ondemand_types:
421 # cannot acquire decryption key for these streams
422 self.report_warning('This is a premium-only stream')
423
424 m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
425 elif video_type == 'slots':
426 api_response = self._download_json(
427 f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
428 note='Checking playability',
429 headers=headers)
430 if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
431 self.report_warning('This is a premium-only stream')
432
433 m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
434 else:
435 raise ExtractorError('Unreachable')
436
437 if is_live:
438 self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
439 self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
440 formats = self._extract_m3u8_formats(
441 m3u8_url, video_id, ext='mp4', live=is_live)
442
443 info.update({
444 'id': video_id,
445 'title': title,
446 'description': description,
447 'formats': formats,
448 'is_live': is_live,
449 })
450 return info
451
452
453class AbemaTVTitleIE(AbemaTVBaseIE):
454 _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
bc83b4b0 455 _PAGE_SIZE = 25
3e9b66d7
LNO
456
457 _TESTS = [{
458 'url': 'https://abema.tv/video/title/90-1597',
459 'info_dict': {
460 'id': '90-1597',
461 'title': 'シャッフルアイランド',
462 },
463 'playlist_mincount': 2,
464 }, {
465 'url': 'https://abema.tv/video/title/193-132',
466 'info_dict': {
467 'id': '193-132',
468 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
469 },
470 'playlist_mincount': 16,
bc83b4b0
L
471 }, {
472 'url': 'https://abema.tv/video/title/25-102',
473 'info_dict': {
474 'id': '25-102',
475 'title': 'ソードアート・オンライン アリシゼーション',
476 },
477 'playlist_mincount': 24,
3e9b66d7
LNO
478 }]
479
bc83b4b0
L
480 def _fetch_page(self, playlist_id, series_version, page):
481 programs = self._call_api(
482 f'v1/video/series/{playlist_id}/programs', playlist_id,
483 note=f'Downloading page {page + 1}',
484 query={
485 'seriesVersion': series_version,
486 'offset': str(page * self._PAGE_SIZE),
487 'order': 'seq',
488 'limit': str(self._PAGE_SIZE),
489 })
490 yield from (
491 self.url_result(f'https://abema.tv/video/episode/{x}')
492 for x in traverse_obj(programs, ('programs', ..., 'id'), default=[]))
3e9b66d7 493
bc83b4b0
L
494 def _entries(self, playlist_id, series_version):
495 return OnDemandPagedList(
496 functools.partial(self._fetch_page, playlist_id, series_version),
497 self._PAGE_SIZE)
3e9b66d7 498
bc83b4b0
L
499 def _real_extract(self, url):
500 playlist_id = self._match_id(url)
501 series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
3e9b66d7 502
bc83b4b0
L
503 return self.playlist_result(
504 self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
505 playlist_title=series_info.get('title'),
506 playlist_description=series_info.get('content'))