8 from base64
import urlsafe_b64encode
9 from binascii
import unhexlify
11 from .common
import InfoExtractor
12 from ..aes
import aes_ecb_decrypt
13 from ..compat
import (
14 compat_urllib_response
,
15 compat_urllib_parse_urlparse
,
16 compat_urllib_request
,
33 # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
35 def add_opener(ydl
, handler
):
36 ''' Add a handler for opening URLs, like _download_webpage '''
37 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
38 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
39 assert isinstance(ydl
._opener
, compat_urllib_request
.OpenerDirector
)
40 ydl
._opener
.add_handler(handler
)
43 def remove_opener(ydl
, handler
):
45 Remove handler(s) for opening URLs
46 @param handler Either handler object itself or handler type.
47 Specifying handler type will remove all handler which isinstance returns True.
49 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
50 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
52 assert isinstance(ydl
._opener
, compat_urllib_request
.OpenerDirector
)
53 if isinstance(handler
, (type, tuple)):
54 find_cp
= lambda x
: isinstance(x
, handler
)
56 find_cp
= lambda x
: x
is handler
59 for meth
in dir(handler
):
60 if meth
in ["redirect_request", "do_open", "proxy_open"]:
61 # oops, coincidental match
66 condition
= meth
[i
+ 1:]
68 if condition
.startswith("error"):
69 j
= condition
.find("_") + i
+ 1
75 lookup
= opener
.handle_error
.get(protocol
, {})
76 opener
.handle_error
[protocol
] = lookup
77 elif condition
== "open":
79 lookup
= opener
.handle_open
80 elif condition
== "response":
82 lookup
= opener
.process_response
83 elif condition
== "request":
85 lookup
= opener
.process_request
89 handlers
= lookup
.setdefault(kind
, [])
91 handlers
[:] = [x
for x
in handlers
if not find_cp(x
)]
93 removed
.append(x
for x
in handlers
if find_cp(x
))
96 for x
in opener
.handlers
:
99 opener
.handlers
[:] = [x
for x
in opener
.handlers
if not find_cp(x
)]
102 class AbemaLicenseHandler(compat_urllib_request
.BaseHandler
):
104 STRTABLE
= '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
105 HKEY
= b
'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
107 def __init__(self
, ie
: 'AbemaTVIE'):
108 # the protcol that this should really handle is 'abematv-license://'
109 # abematv_license_open is just a placeholder for development purposes
110 # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
111 setattr(self
, 'abematv-license_open', getattr(self
, 'abematv_license_open'))
114 def _get_videokey_from_ticket(self
, ticket
):
115 to_show
= self
.ie
._downloader
.params
.get('verbose', False)
116 media_token
= self
.ie
._get
_media
_token
(to_show
=to_show
)
118 license_response
= self
.ie
._download
_json
(
119 'https://license.abema.io/abematv-hls', None, note
='Requesting playback license' if to_show
else False,
120 query
={'t': media_token}
,
126 'Content-Type': 'application/json',
129 res
= decode_base(license_response
['k'], self
.STRTABLE
)
130 encvideokey
= bytes_to_intlist(struct
.pack('>QQ', res
>> 64, res
& 0xffffffffffffffff))
133 unhexlify(self
.HKEY
),
134 (license_response
['cid'] + self
.ie
._DEVICE
_ID
).encode('utf-8'),
135 digestmod
=hashlib
.sha256
)
136 enckey
= bytes_to_intlist(h
.digest())
138 return intlist_to_bytes(aes_ecb_decrypt(encvideokey
, enckey
))
140 def abematv_license_open(self
, url
):
141 url
= request_to_url(url
)
142 ticket
= compat_urllib_parse_urlparse(url
).netloc
143 response_data
= self
._get
_videokey
_from
_ticket
(ticket
)
144 return compat_urllib_response
.addinfourl(io
.BytesIO(response_data
), headers
={
145 'Content-Length': len(response_data
),
146 }, url
=url
, code
=200)
149 class AbemaTVBaseIE(InfoExtractor
):
150 def _extract_breadcrumb_list(self
, webpage
, video_id
):
151 for jld
in re
.finditer(
152 r
'(?is)</span></li></ul><script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>(?P
<json_ld
>.+?
)</script
>',
154 jsonld = self._parse_json(jld.group('json_ld
'), video_id, fatal=False)
156 if jsonld.get('@type') != 'BreadcrumbList
':
158 trav = traverse_obj(jsonld, ('itemListElement
', ..., 'name
'))
164 class AbemaTVIE(AbemaTVBaseIE):
165 _VALID_URL = r'https?
://abema\
.tv
/(?P
<type>now
-on
-air|video
/episode|channels
/.+?
/slots
)/(?P
<id>[^?
/]+)'
166 _NETRC_MACHINE = 'abematv
'
168 'url
': 'https
://abema
.tv
/video
/episode
/194-25_s
2_p
1',
170 'id': '194-25_s
2_p
1',
171 'title
': '第
1話 「チーズケーキ」 「モーニング再び」
',
174 'episode
': '第
1話 「チーズケーキ」 「モーニング再び」
',
179 'url
': 'https
://abema
.tv
/channels
/anime
-live2
/slots
/E8tvAnMJ7a9a5d
',
181 'id': 'E8tvAnMJ7a9a5d
',
182 'title
': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ
72時間】
',
183 'series
': 'ゆるキャン△ SEASON2
',
184 'episode
': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ
72時間】
',
187 'description
': 'md5
:9c5a3172ae763278f9303922f0ea5b17
',
191 'url
': 'https
://abema
.tv
/video
/episode
/87-877_s
1282_p
31047',
193 'id': 'E8tvAnMJ7a9a5d
',
195 'description
': 'md5
:56d4fc1b4f7769ded5f923c55bb4695d
',
196 'thumbnail
': r're
:https
://hayabusa\
.io
/.+',
198 'episode
': '第
5話『光射す』
',
202 'url
': 'https
://abema
.tv
/now
-on
-air
/abema
-anime
',
206 # 'title
': '女子高生の無駄づかい 全話一挙【無料ビデオ
72時間】
',
207 'description
': 'md5
:55f2e61f46a17e9230802d7bcc913d5f
',
210 'skip
': 'Not supported until yt
-dlp implements native live downloader OR AbemaTV can start a local HTTP server
',
217 _SECRETKEY = b'v
+Gjs
=25Aw5erR
!J8ZuvRrCx
*rGswhB
&qdHd_SYerEWdU
&a?
3DzN9BRbp5KwY4hEmcj5
#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
219 def _generate_aks(self
, deviceid
):
220 deviceid
= deviceid
.encode('utf-8')
221 # add 1 hour and then drop minute and secs
222 ts_1hour
= int((time_seconds(hours
=9) // 3600 + 1) * 3600)
223 time_struct
= time
.gmtime(ts_1hour
)
224 ts_1hour_str
= str(ts_1hour
).encode('utf-8')
230 h
= hmac
.new(self
._SECRETKEY
, digestmod
=hashlib
.sha256
)
236 for i
in range(count
):
239 def mix_twist(nonce
):
241 mix_once(urlsafe_b64encode(tmp
).rstrip(b
'=') + nonce
)
243 mix_once(self
._SECRETKEY
)
244 mix_tmp(time_struct
.tm_mon
)
246 mix_tmp(time_struct
.tm_mday
% 5)
247 mix_twist(ts_1hour_str
)
248 mix_tmp(time_struct
.tm_hour
% 5)
250 return urlsafe_b64encode(tmp
).rstrip(b
'=').decode('utf-8')
252 def _get_device_token(self
):
254 return self
._USERTOKEN
256 self
._DEVICE
_ID
= random_uuidv4()
257 aks
= self
._generate
_aks
(self
._DEVICE
_ID
)
258 user_data
= self
._download
_json
(
259 'https://api.abema.io/v1/users', None, note
='Authorizing',
261 'deviceId': self
._DEVICE
_ID
,
262 'applicationKeySecret': aks
,
265 'Content-Type': 'application/json',
267 self
._USERTOKEN
= user_data
['token']
269 # don't allow adding it 2 times or more, though it's guarded
270 remove_opener(self
._downloader
, AbemaLicenseHandler
)
271 add_opener(self
._downloader
, AbemaLicenseHandler(self
))
273 return self
._USERTOKEN
275 def _get_media_token(self
, invalidate
=False, to_show
=True):
276 if not invalidate
and self
._MEDIATOKEN
:
277 return self
._MEDIATOKEN
279 self
._MEDIATOKEN
= self
._download
_json
(
280 'https://api.abema.io/v1/media/token', None, note
='Fetching media token' if to_show
else False,
283 'osVersion': '6.0.1',
285 'osTimezone': 'Asia/Tokyo',
287 'appVersion': '3.27.1'
289 'Authorization': 'bearer ' + self
._get
_device
_token
()
292 return self
._MEDIATOKEN
294 def _perform_login(self
, username
, password
):
295 if '@' in username
: # don't strictly check if it's email address or not
296 ep
, method
= 'user/email', 'email'
298 ep
, method
= 'oneTimePassword', 'userId'
300 login_response
= self
._download
_json
(
301 f
'https://api.abema.io/v1/auth/{ep}', None, note
='Logging in',
305 }).encode('utf-8'), headers
={
306 'Authorization': 'bearer ' + self
._get
_device
_token
(),
307 'Origin': 'https://abema.tv',
308 'Referer': 'https://abema.tv/',
309 'Content-Type': 'application/json',
312 self
._USERTOKEN
= login_response
['token']
313 self
._get
_media
_token
(True)
315 def _real_extract(self
, url
):
316 # starting download using infojson from this extractor is undefined behavior,
317 # and never be fixed in the future; you must trigger downloads by directly specifing URL.
318 # (unless there's a way to hook before downloading by extractor)
319 video_id
, video_type
= self
._match
_valid
_url
(url
).group('id', 'type')
321 'Authorization': 'Bearer ' + self
._get
_device
_token
(),
323 video_type
= video_type
.split('/')[-1]
325 webpage
= self
._download
_webpage
(url
, video_id
)
326 canonical_url
= self
._search
_regex
(
327 r
'<link\s+rel="canonical"\s*href="(.+?)"', webpage
, 'canonical URL',
329 info
= self
._search
_json
_ld
(webpage
, video_id
, default
={})
331 title
= self
._search
_regex
(
332 r
'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage
, 'title', default
=None)
335 for jld
in re
.finditer(
336 r
'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>(?P
<json_ld
>.+?
)</script
>',
338 jsonld = self._parse_json(jld.group('json_ld
'), video_id, fatal=False)
342 title = jsonld.get('caption
')
343 if not title and video_type == 'now
-on
-air
':
344 if not self._TIMETABLE:
345 # cache the timetable because it goes to 5MiB in size (!!)
346 self._TIMETABLE = self._download_json(
347 'https
://api
.abema
.io
/v1
/timetable
/dataSet?debug
=false
', video_id,
349 now = time_seconds(hours=9)
350 for slot in self._TIMETABLE.get('slots
', []):
351 if slot.get('channelId
') != video_id:
353 if slot['startAt
'] <= now and now < slot['endAt
']:
354 title = slot['title
']
357 # read breadcrumb on top of page
358 breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
360 # breadcrumb list translates to: (example is 1st test for this IE)
361 # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
363 info['series
'] = breadcrumb[-2]
364 info['episode
'] = breadcrumb[-1]
366 title = info['episode
']
368 description = self._html_search_regex(
369 (r'<p\s
+class="com-video-EpisodeDetailsBlock__content"><span\s
+class=".+?">(.+?
)</span
></p
><div
',
370 r'<span\s
+class=".+?SlotSummary.+?">(.+?
)</span
></div
><div
',),
371 webpage, 'description
', default=None, group=1)
373 og_desc = self._html_search_meta(
374 ('description
', 'og
:description
', 'twitter
:description
'), webpage)
376 description = re.sub(r'''(?sx)
378 アニメの動画を無料で見るならABEMA!| # anime
379 等、.+ # applies for most of categories
383 # canonical URL may contain series and episode number
384 mobj = re.search(r's(\d
+)_p(\d
+)$
', canonical_url)
386 seri = int_or_none(mobj.group(1), default=float('inf
'))
387 epis = int_or_none(mobj.group(2), default=float('inf
'))
388 info['series_number
'] = seri if seri < 100 else None
389 # some anime like Detective Conan (though not available in AbemaTV)
390 # has more than 1000 episodes (1026 as of 2021/11/15)
391 info['episode_number
'] = epis if epis < 2000 else None
393 is_live, m3u8_url = False, None
394 if video_type == 'now
-on
-air
':
396 channel_url = 'https
://api
.abema
.io
/v1
/channels
'
397 if video_id == 'news
-global':
398 channel_url = update_url_query(channel_url, {'division': '1'})
399 onair_channels = self._download_json(channel_url, video_id)
400 for ch in onair_channels['channels
']:
401 if video_id == ch['id']:
402 m3u8_url = ch['playback
']['hls
']
405 raise ExtractorError(f'Cannot find on
-air {video_id} channel
.', expected=True)
406 elif video_type == 'episode
':
407 api_response = self._download_json(
408 f'https
://api
.abema
.io
/v1
/video
/programs
/{video_id}
', video_id,
409 note='Checking playability
',
411 ondemand_types = traverse_obj(api_response, ('terms
', ..., 'onDemandType
'), default=[])
412 if 3 not in ondemand_types:
413 # cannot acquire decryption key for these streams
414 self.report_warning('This
is a premium
-only stream
')
416 m3u8_url = f'https
://vod
-abematv
.akamaized
.net
/program
/{video_id}
/playlist
.m3u8
'
417 elif video_type == 'slots
':
418 api_response = self._download_json(
419 f'https
://api
.abema
.io
/v1
/media
/slots
/{video_id}
', video_id,
420 note='Checking playability
',
422 if not traverse_obj(api_response, ('slot
', 'flags
', 'timeshiftFree
'), default=False):
423 self.report_warning('This
is a premium
-only stream
')
425 m3u8_url = f'https
://vod
-abematv
.akamaized
.net
/slot
/{video_id}
/playlist
.m3u8
'
427 raise ExtractorError('Unreachable
')
430 self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively
, but FFmpeg cannot handle m3u8 manifests
from AbemaTV
")
431 self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
432 formats = self._extract_m3u8_formats(
433 m3u8_url, video_id, ext='mp4', live=is_live)
438 'description': description,
445 class AbemaTVTitleIE(AbemaTVBaseIE):
446 _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
449 'url': 'https://abema.tv/video/title/90-1597',
452 'title': 'シャッフルアイランド',
454 'playlist_mincount': 2,
456 'url': 'https://abema.tv/video/title/193-132',
459 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
461 'playlist_mincount': 16,
464 def _real_extract(self, url):
465 video_id = self._match_id(url)
466 webpage = self._download_webpage(url, video_id)
468 playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id)
470 playlist_title = breadcrumb[-1]
473 self.url_result(urljoin('https://abema.tv/', mobj.group(1)))
474 for mobj in re.finditer(r'<li\s*class=".+?EpisodeList
.+?
"><a\s*href="(/[^
"]+?)"', webpage)]
476 return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id)