]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/abematv.py
[compat] Remove deprecated functions from core code
[yt-dlp.git] / yt_dlp / extractor / abematv.py
CommitLineData
f8271158 1import base64
2import binascii
3e9b66d7
LNO
3import hashlib
4import hmac
f8271158 5import io
6import json
3e9b66d7
LNO
7import re
8import struct
f8271158 9import time
14f25df2 10import urllib.parse
ac668111 11import urllib.request
f9934b96 12import urllib.response
13import uuid
3e9b66d7 14
3e9b66d7
LNO
15from .common import InfoExtractor
16from ..aes import aes_ecb_decrypt
3e9b66d7
LNO
17from ..utils import (
18 ExtractorError,
f8271158 19 bytes_to_intlist,
7b2c3f47 20 decode_base_n,
3e9b66d7 21 int_or_none,
f8271158 22 intlist_to_bytes,
3e9b66d7
LNO
23 request_to_url,
24 time_seconds,
3e9b66d7 25 traverse_obj,
f8271158 26 update_url_query,
3e9b66d7
LNO
27 urljoin,
28)
29
3e9b66d7
LNO
30# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
31
e5a998f3 32
08d30158 33def add_opener(ydl, handler):
3e9b66d7
LNO
34 ''' Add a handler for opening URLs, like _download_webpage '''
35 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
36 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
ac668111 37 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
08d30158 38 ydl._opener.add_handler(handler)
3e9b66d7
LNO
39
40
08d30158 41def remove_opener(ydl, handler):
3e9b66d7
LNO
42 '''
43 Remove handler(s) for opening URLs
44 @param handler Either handler object itself or handler type.
45 Specifying handler type will remove all handler which isinstance returns True.
46 '''
47 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
48 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
08d30158 49 opener = ydl._opener
ac668111 50 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
3e9b66d7
LNO
51 if isinstance(handler, (type, tuple)):
52 find_cp = lambda x: isinstance(x, handler)
53 else:
54 find_cp = lambda x: x is handler
55
56 removed = []
57 for meth in dir(handler):
58 if meth in ["redirect_request", "do_open", "proxy_open"]:
59 # oops, coincidental match
60 continue
61
62 i = meth.find("_")
63 protocol = meth[:i]
64 condition = meth[i + 1:]
65
66 if condition.startswith("error"):
67 j = condition.find("_") + i + 1
68 kind = meth[j + 1:]
69 try:
70 kind = int(kind)
71 except ValueError:
72 pass
73 lookup = opener.handle_error.get(protocol, {})
74 opener.handle_error[protocol] = lookup
75 elif condition == "open":
76 kind = protocol
77 lookup = opener.handle_open
78 elif condition == "response":
79 kind = protocol
80 lookup = opener.process_response
81 elif condition == "request":
82 kind = protocol
83 lookup = opener.process_request
84 else:
85 continue
86
87 handlers = lookup.setdefault(kind, [])
88 if handlers:
89 handlers[:] = [x for x in handlers if not find_cp(x)]
90
91 removed.append(x for x in handlers if find_cp(x))
92
93 if removed:
94 for x in opener.handlers:
95 if find_cp(x):
96 x.add_parent(None)
97 opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
98
99
ac668111 100class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7
LNO
101 handler_order = 499
102 STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
103 HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
104
105 def __init__(self, ie: 'AbemaTVIE'):
106 # the protcol that this should really handle is 'abematv-license://'
107 # abematv_license_open is just a placeholder for development purposes
108 # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
109 setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
110 self.ie = ie
111
112 def _get_videokey_from_ticket(self, ticket):
9809740b 113 to_show = self.ie.get_param('verbose', False)
3e9b66d7
LNO
114 media_token = self.ie._get_media_token(to_show=to_show)
115
116 license_response = self.ie._download_json(
117 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
118 query={'t': media_token},
119 data=json.dumps({
120 'kv': 'a',
121 'lt': ticket
122 }).encode('utf-8'),
123 headers={
124 'Content-Type': 'application/json',
125 })
126
7b2c3f47 127 res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7
LNO
128 encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
129
130 h = hmac.new(
f8271158 131 binascii.unhexlify(self.HKEY),
3e9b66d7
LNO
132 (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
133 digestmod=hashlib.sha256)
134 enckey = bytes_to_intlist(h.digest())
135
136 return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
137
138 def abematv_license_open(self, url):
139 url = request_to_url(url)
14f25df2 140 ticket = urllib.parse.urlparse(url).netloc
3e9b66d7 141 response_data = self._get_videokey_from_ticket(ticket)
f9934b96 142 return urllib.response.addinfourl(io.BytesIO(response_data), headers={
3e9b66d7
LNO
143 'Content-Length': len(response_data),
144 }, url=url, code=200)
145
146
147class AbemaTVBaseIE(InfoExtractor):
148 def _extract_breadcrumb_list(self, webpage, video_id):
149 for jld in re.finditer(
150 r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
151 webpage):
152 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
153 if jsonld:
154 if jsonld.get('@type') != 'BreadcrumbList':
155 continue
156 trav = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
157 if trav:
158 return trav
159 return []
160
161
162class AbemaTVIE(AbemaTVBaseIE):
163 _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
164 _NETRC_MACHINE = 'abematv'
165 _TESTS = [{
166 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
167 'info_dict': {
168 'id': '194-25_s2_p1',
169 'title': '第1話 「チーズケーキ」 「モーニング再び」',
170 'series': '異世界食堂2',
171 'series_number': 2,
172 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
173 'episode_number': 1,
174 },
175 'skip': 'expired',
176 }, {
177 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
178 'info_dict': {
179 'id': 'E8tvAnMJ7a9a5d',
180 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
181 'series': 'ゆるキャン△ SEASON2',
182 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
183 'series_number': 2,
184 'episode_number': 1,
185 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
186 },
187 'skip': 'expired',
188 }, {
189 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
190 'info_dict': {
191 'id': 'E8tvAnMJ7a9a5d',
192 'title': '第5話『光射す』',
193 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
194 'thumbnail': r're:https://hayabusa\.io/.+',
195 'series': '相棒',
196 'episode': '第5話『光射す』',
197 },
198 'skip': 'expired',
199 }, {
200 'url': 'https://abema.tv/now-on-air/abema-anime',
201 'info_dict': {
202 'id': 'abema-anime',
203 # this varies
204 # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
205 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
206 'is_live': True,
207 },
208 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
209 }]
210 _USERTOKEN = None
211 _DEVICE_ID = None
212 _TIMETABLE = None
213 _MEDIATOKEN = None
214
215 _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
216
217 def _generate_aks(self, deviceid):
218 deviceid = deviceid.encode('utf-8')
219 # add 1 hour and then drop minute and secs
220 ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
221 time_struct = time.gmtime(ts_1hour)
222 ts_1hour_str = str(ts_1hour).encode('utf-8')
223
224 tmp = None
225
226 def mix_once(nonce):
227 nonlocal tmp
228 h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256)
229 h.update(nonce)
230 tmp = h.digest()
231
232 def mix_tmp(count):
233 nonlocal tmp
234 for i in range(count):
235 mix_once(tmp)
236
237 def mix_twist(nonce):
238 nonlocal tmp
f8271158 239 mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7
LNO
240
241 mix_once(self._SECRETKEY)
242 mix_tmp(time_struct.tm_mon)
243 mix_twist(deviceid)
244 mix_tmp(time_struct.tm_mday % 5)
245 mix_twist(ts_1hour_str)
246 mix_tmp(time_struct.tm_hour % 5)
247
f8271158 248 return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7
LNO
249
250 def _get_device_token(self):
251 if self._USERTOKEN:
252 return self._USERTOKEN
253
f9934b96 254 self._DEVICE_ID = str(uuid.uuid4())
3e9b66d7
LNO
255 aks = self._generate_aks(self._DEVICE_ID)
256 user_data = self._download_json(
257 'https://api.abema.io/v1/users', None, note='Authorizing',
258 data=json.dumps({
259 'deviceId': self._DEVICE_ID,
260 'applicationKeySecret': aks,
261 }).encode('utf-8'),
262 headers={
263 'Content-Type': 'application/json',
264 })
265 self._USERTOKEN = user_data['token']
266
267 # don't allow adding it 2 times or more, though it's guarded
268 remove_opener(self._downloader, AbemaLicenseHandler)
269 add_opener(self._downloader, AbemaLicenseHandler(self))
270
271 return self._USERTOKEN
272
273 def _get_media_token(self, invalidate=False, to_show=True):
274 if not invalidate and self._MEDIATOKEN:
275 return self._MEDIATOKEN
276
277 self._MEDIATOKEN = self._download_json(
278 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
279 query={
280 'osName': 'android',
281 'osVersion': '6.0.1',
282 'osLang': 'ja_JP',
283 'osTimezone': 'Asia/Tokyo',
284 'appId': 'tv.abema',
285 'appVersion': '3.27.1'
286 }, headers={
287 'Authorization': 'bearer ' + self._get_device_token()
288 })['token']
289
290 return self._MEDIATOKEN
291
52efa4b3 292 def _perform_login(self, username, password):
3e9b66d7
LNO
293 if '@' in username: # don't strictly check if it's email address or not
294 ep, method = 'user/email', 'email'
295 else:
296 ep, method = 'oneTimePassword', 'userId'
297
298 login_response = self._download_json(
299 f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
300 data=json.dumps({
301 method: username,
302 'password': password
303 }).encode('utf-8'), headers={
304 'Authorization': 'bearer ' + self._get_device_token(),
305 'Origin': 'https://abema.tv',
306 'Referer': 'https://abema.tv/',
307 'Content-Type': 'application/json',
308 })
309
310 self._USERTOKEN = login_response['token']
311 self._get_media_token(True)
312
313 def _real_extract(self, url):
314 # starting download using infojson from this extractor is undefined behavior,
315 # and never be fixed in the future; you must trigger downloads by directly specifing URL.
316 # (unless there's a way to hook before downloading by extractor)
317 video_id, video_type = self._match_valid_url(url).group('id', 'type')
318 headers = {
319 'Authorization': 'Bearer ' + self._get_device_token(),
320 }
321 video_type = video_type.split('/')[-1]
322
323 webpage = self._download_webpage(url, video_id)
324 canonical_url = self._search_regex(
325 r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
326 default=url)
327 info = self._search_json_ld(webpage, video_id, default={})
328
329 title = self._search_regex(
330 r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
331 if not title:
332 jsonld = None
333 for jld in re.finditer(
334 r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
335 webpage):
336 jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
337 if jsonld:
338 break
339 if jsonld:
340 title = jsonld.get('caption')
341 if not title and video_type == 'now-on-air':
342 if not self._TIMETABLE:
343 # cache the timetable because it goes to 5MiB in size (!!)
344 self._TIMETABLE = self._download_json(
345 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
346 headers=headers)
347 now = time_seconds(hours=9)
348 for slot in self._TIMETABLE.get('slots', []):
349 if slot.get('channelId') != video_id:
350 continue
351 if slot['startAt'] <= now and now < slot['endAt']:
352 title = slot['title']
353 break
354
355 # read breadcrumb on top of page
356 breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
357 if breadcrumb:
358 # breadcrumb list translates to: (example is 1st test for this IE)
359 # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
360 # hence this works
361 info['series'] = breadcrumb[-2]
362 info['episode'] = breadcrumb[-1]
363 if not title:
364 title = info['episode']
365
366 description = self._html_search_regex(
367 (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
368 r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
369 webpage, 'description', default=None, group=1)
370 if not description:
371 og_desc = self._html_search_meta(
372 ('description', 'og:description', 'twitter:description'), webpage)
373 if og_desc:
374 description = re.sub(r'''(?sx)
375 ^(.+?)(?:
376 アニメの動画を無料で見るならABEMA!| # anime
377 等、.+ # applies for most of categories
378 )?
379 ''', r'\1', og_desc)
380
381 # canonical URL may contain series and episode number
382 mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
383 if mobj:
384 seri = int_or_none(mobj.group(1), default=float('inf'))
385 epis = int_or_none(mobj.group(2), default=float('inf'))
386 info['series_number'] = seri if seri < 100 else None
387 # some anime like Detective Conan (though not available in AbemaTV)
388 # has more than 1000 episodes (1026 as of 2021/11/15)
389 info['episode_number'] = epis if epis < 2000 else None
390
391 is_live, m3u8_url = False, None
392 if video_type == 'now-on-air':
393 is_live = True
394 channel_url = 'https://api.abema.io/v1/channels'
395 if video_id == 'news-global':
396 channel_url = update_url_query(channel_url, {'division': '1'})
397 onair_channels = self._download_json(channel_url, video_id)
398 for ch in onair_channels['channels']:
399 if video_id == ch['id']:
400 m3u8_url = ch['playback']['hls']
401 break
402 else:
403 raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
404 elif video_type == 'episode':
405 api_response = self._download_json(
406 f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
407 note='Checking playability',
408 headers=headers)
409 ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[])
410 if 3 not in ondemand_types:
411 # cannot acquire decryption key for these streams
412 self.report_warning('This is a premium-only stream')
413
414 m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
415 elif video_type == 'slots':
416 api_response = self._download_json(
417 f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
418 note='Checking playability',
419 headers=headers)
420 if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
421 self.report_warning('This is a premium-only stream')
422
423 m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
424 else:
425 raise ExtractorError('Unreachable')
426
427 if is_live:
428 self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
429 self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
430 formats = self._extract_m3u8_formats(
431 m3u8_url, video_id, ext='mp4', live=is_live)
432
433 info.update({
434 'id': video_id,
435 'title': title,
436 'description': description,
437 'formats': formats,
438 'is_live': is_live,
439 })
440 return info
441
442
443class AbemaTVTitleIE(AbemaTVBaseIE):
444 _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
445
446 _TESTS = [{
447 'url': 'https://abema.tv/video/title/90-1597',
448 'info_dict': {
449 'id': '90-1597',
450 'title': 'シャッフルアイランド',
451 },
452 'playlist_mincount': 2,
453 }, {
454 'url': 'https://abema.tv/video/title/193-132',
455 'info_dict': {
456 'id': '193-132',
457 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
458 },
459 'playlist_mincount': 16,
460 }]
461
462 def _real_extract(self, url):
463 video_id = self._match_id(url)
464 webpage = self._download_webpage(url, video_id)
465
466 playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id)
467 if breadcrumb:
468 playlist_title = breadcrumb[-1]
469
470 playlist = [
471 self.url_result(urljoin('https://abema.tv/', mobj.group(1)))
472 for mobj in re.finditer(r'<li\s*class=".+?EpisodeList.+?"><a\s*href="(/[^"]+?)"', webpage)]
473
474 return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id)