[yt-dlp.git] / yt_dlp / extractor / abematv.py

import base64
import binascii
import functools
import hashlib
import hmac
import io
import json
import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid

from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    bytes_to_intlist,
    decode_base_n,
    int_or_none,
    intlist_to_bytes,
    time_seconds,
    traverse_obj,
    update_url_query,
)
from ..utils.networking import clean_proxies


def add_opener(ydl, handler):  # FIXME: Create proper API in .networking
    """Add a handler for opening URLs, like _download_webpage"""
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
    rh = ydl._request_director.handlers['Urllib']
    if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
        return
    headers = ydl.params['http_headers'].copy()
    proxies = ydl.proxies.copy()
    clean_proxies(proxies, headers)
    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
    assert isinstance(opener, urllib.request.OpenerDirector)
    opener.add_handler(handler)
    rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')


class AbemaLicenseHandler(urllib.request.BaseHandler):
    handler_order = 499
    STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
    HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'

    def __init__(self, ie: 'AbemaTVIE'):
        # the protocol that this should really handle is 'abematv-license://'
        # abematv_license_open is just a placeholder for development purposes
        # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
        setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
        self.ie = ie

    def _get_videokey_from_ticket(self, ticket):
        to_show = self.ie.get_param('verbose', False)
        media_token = self.ie._get_media_token(to_show=to_show)

        license_response = self.ie._download_json(
            'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
            query={'t': media_token},
            data=json.dumps({
                'kv': 'a',
                'lt': ticket
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })

        res = decode_base_n(license_response['k'], table=self.STRTABLE)
        encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))

        h = hmac.new(
            binascii.unhexlify(self.HKEY),
            (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
            digestmod=hashlib.sha256)
        enckey = bytes_to_intlist(h.digest())

        return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))

    def abematv_license_open(self, url):
        url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
        ticket = urllib.parse.urlparse(url).netloc
        response_data = self._get_videokey_from_ticket(ticket)
        return urllib.response.addinfourl(io.BytesIO(response_data), headers={
            'Content-Length': str(len(response_data)),
        }, url=url, code=200)


class AbemaTVBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'abematv'

    _USERTOKEN = None
    _DEVICE_ID = None
    _MEDIATOKEN = None

    _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'

    @classmethod
    def _generate_aks(cls, deviceid):
        deviceid = deviceid.encode('utf-8')
        # add 1 hour and then drop minute and secs
        ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
        time_struct = time.gmtime(ts_1hour)
        ts_1hour_str = str(ts_1hour).encode('utf-8')

        tmp = None

        def mix_once(nonce):
            nonlocal tmp
            h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
            h.update(nonce)
            tmp = h.digest()

        def mix_tmp(count):
            nonlocal tmp
            for i in range(count):
                mix_once(tmp)

        def mix_twist(nonce):
            nonlocal tmp
            mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)

        mix_once(cls._SECRETKEY)
        mix_tmp(time_struct.tm_mon)
        mix_twist(deviceid)
        mix_tmp(time_struct.tm_mday % 5)
        mix_twist(ts_1hour_str)
        mix_tmp(time_struct.tm_hour % 5)

        return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')

    def _get_device_token(self):
        if self._USERTOKEN:
            return self._USERTOKEN

        add_opener(self._downloader, AbemaLicenseHandler(self))

        username, _ = self._get_login_info()
        auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
        AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
        if AbemaTVBaseIE._USERTOKEN:
            # try authentication with locally stored token
            try:
                AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
                self._get_media_token(True)
                return
            except ExtractorError as e:
                self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')

        AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
        aks = self._generate_aks(self._DEVICE_ID)
        user_data = self._download_json(
            'https://api.abema.io/v1/users', None, note='Authorizing',
            data=json.dumps({
                'deviceId': self._DEVICE_ID,
                'applicationKeySecret': aks,
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })
        AbemaTVBaseIE._USERTOKEN = user_data['token']

        return self._USERTOKEN

    def _get_media_token(self, invalidate=False, to_show=True):
        if not invalidate and self._MEDIATOKEN:
            return self._MEDIATOKEN

        AbemaTVBaseIE._MEDIATOKEN = self._download_json(
            'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
            query={
                'osName': 'android',
                'osVersion': '6.0.1',
                'osLang': 'ja_JP',
                'osTimezone': 'Asia/Tokyo',
                'appId': 'tv.abema',
                'appVersion': '3.27.1'
            }, headers={
                'Authorization': f'bearer {self._get_device_token()}',
            })['token']

        return self._MEDIATOKEN

    def _perform_login(self, username, password):
        self._get_device_token()
        if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
            self.write_debug('Skipping logging in')
            return

        if '@' in username:  # don't strictly check if it's email address or not
            ep, method = 'user/email', 'email'
        else:
            ep, method = 'oneTimePassword', 'userId'

        login_response = self._download_json(
            f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
            data=json.dumps({
                method: username,
                'password': password
            }).encode('utf-8'), headers={
                'Authorization': f'bearer {self._get_device_token()}',
                'Origin': 'https://abema.tv',
                'Referer': 'https://abema.tv/',
                'Content-Type': 'application/json',
            })

        AbemaTVBaseIE._USERTOKEN = login_response['token']
        self._get_media_token(True)
        auth_cache = {
            'device_id': AbemaTVBaseIE._DEVICE_ID,
            'usertoken': AbemaTVBaseIE._USERTOKEN,
        }
        self.cache.store(self._NETRC_MACHINE, username, auth_cache)

    def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
        return self._download_json(
            f'https://api.abema.io/{endpoint}', video_id, query=query or {},
            note=note,
            headers={
                'Authorization': f'bearer {self._get_device_token()}',
            })

    def _extract_breadcrumb_list(self, webpage, video_id):
        for jld in re.finditer(
                r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                webpage):
            jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
            if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
                continue
            items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
            if items:
                return items
        return []


class AbemaTVIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
    _TESTS = [{
        'url': 'https://abema.tv/video/episode/194-25_s2_p1',
        'info_dict': {
            'id': '194-25_s2_p1',
            'title': '第1話 「チーズケーキ」　「モーニング再び」',
            'series': '異世界食堂２',
            'season': 'シーズン2',
            'season_number': 2,
            'episode': '第1話 「チーズケーキ」　「モーニング再び」',
            'episode_number': 1,
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'series': 'ゆるキャン△ SEASON２',
            'episode': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'season_number': 2,
            'episode_number': 1,
            'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': '第5話『光射す』',
            'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
            'thumbnail': r're:https://hayabusa\.io/.+',
            'series': '相棒',
            'episode': '第5話『光射す』',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/now-on-air/abema-anime',
        'info_dict': {
            'id': 'abema-anime',
            # this varies
            # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
            'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
            'is_live': True,
        },
        'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
    }]
    _TIMETABLE = None

    def _real_extract(self, url):
        # starting download using infojson from this extractor is undefined behavior,
        # and never be fixed in the future; you must trigger downloads by directly specifying URL.
        # (unless there's a way to hook before downloading by extractor)
        video_id, video_type = self._match_valid_url(url).group('id', 'type')
        headers = {
            'Authorization': 'Bearer ' + self._get_device_token(),
        }
        video_type = video_type.split('/')[-1]

        webpage = self._download_webpage(url, video_id)
        canonical_url = self._search_regex(
            r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
            default=url)
        info = self._search_json_ld(webpage, video_id, default={})

        title = self._search_regex(
            r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
        if not title:
            jsonld = None
            for jld in re.finditer(
                    r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                    webpage):
                jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
                if jsonld:
                    break
            if jsonld:
                title = jsonld.get('caption')
        if not title and video_type == 'now-on-air':
            if not self._TIMETABLE:
                # cache the timetable because it goes to 5MiB in size (!!)
                self._TIMETABLE = self._download_json(
                    'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
                    headers=headers)
            now = time_seconds(hours=9)
            for slot in self._TIMETABLE.get('slots', []):
                if slot.get('channelId') != video_id:
                    continue
                if slot['startAt'] <= now and now < slot['endAt']:
                    title = slot['title']
                    break

        # read breadcrumb on top of page
        breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
        if breadcrumb:
            # breadcrumb list translates to: (e.g. 1st test for this IE)
            # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
            # hence this works
            info['series'] = breadcrumb[-2]
            info['episode'] = breadcrumb[-1]
            if not title:
                title = info['episode']

        description = self._html_search_regex(
            (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
             r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
            webpage, 'description', default=None, group=1)
        if not description:
            og_desc = self._html_search_meta(
                ('description', 'og:description', 'twitter:description'), webpage)
            if og_desc:
                description = re.sub(r'''(?sx)
                    ^(.+?)(?:
                        アニメの動画を無料で見るならABEMA！| # anime
                        等、.+ # applies for most of categories
                    )?
                ''', r'\1', og_desc)

        # canonical URL may contain season and episode number
        mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
        if mobj:
            seri = int_or_none(mobj.group(1), default=float('inf'))
            epis = int_or_none(mobj.group(2), default=float('inf'))
            info['season_number'] = seri if seri < 100 else None
            # some anime like Detective Conan (though not available in AbemaTV)
            # has more than 1000 episodes (1026 as of 2021/11/15)
            info['episode_number'] = epis if epis < 2000 else None

        is_live, m3u8_url = False, None
        if video_type == 'now-on-air':
            is_live = True
            channel_url = 'https://api.abema.io/v1/channels'
            if video_id == 'news-global':
                channel_url = update_url_query(channel_url, {'division': '1'})
            onair_channels = self._download_json(channel_url, video_id)
            for ch in onair_channels['channels']:
                if video_id == ch['id']:
                    m3u8_url = ch['playback']['hls']
                    break
            else:
                raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
        elif video_type == 'episode':
            api_response = self._download_json(
                f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
            if 3 not in ondemand_types:
                # cannot acquire decryption key for these streams
                self.report_warning('This is a premium-only stream')
            info.update(traverse_obj(api_response, {
                'series': ('series', 'title'),
                'season': ('season', 'name'),
                'season_number': ('season', 'sequence'),
                'episode_number': ('episode', 'number'),
            }))
            if not title:
                title = traverse_obj(api_response, ('episode', 'title'))
            if not description:
                description = traverse_obj(api_response, ('episode', 'content'))

            m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
        elif video_type == 'slots':
            api_response = self._download_json(
                f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
                self.report_warning('This is a premium-only stream')

            m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
        else:
            raise ExtractorError('Unreachable')

        if is_live:
            self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
            self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
        formats = self._extract_m3u8_formats(
            m3u8_url, video_id, ext='mp4', live=is_live)

        info.update({
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
            'is_live': is_live,
        })
        return info


class AbemaTVTitleIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
    _PAGE_SIZE = 25

    _TESTS = [{
        'url': 'https://abema.tv/video/title/90-1597',
        'info_dict': {
            'id': '90-1597',
            'title': 'シャッフルアイランド',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'https://abema.tv/video/title/193-132',
        'info_dict': {
            'id': '193-132',
            'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
        },
        'playlist_mincount': 16,
    }, {
        'url': 'https://abema.tv/video/title/25-102',
        'info_dict': {
            'id': '25-102',
            'title': 'ソードアート・オンライン アリシゼーション',
        },
        'playlist_mincount': 24,
    }]

    def _fetch_page(self, playlist_id, series_version, page):
        programs = self._call_api(
            f'v1/video/series/{playlist_id}/programs', playlist_id,
            note=f'Downloading page {page + 1}',
            query={
                'seriesVersion': series_version,
                'offset': str(page * self._PAGE_SIZE),
                'order': 'seq',
                'limit': str(self._PAGE_SIZE),
            })
        yield from (
            self.url_result(f'https://abema.tv/video/episode/{x}')
            for x in traverse_obj(programs, ('programs', ..., 'id')))

    def _entries(self, playlist_id, series_version):
        return OnDemandPagedList(
            functools.partial(self._fetch_page, playlist_id, series_version),
            self._PAGE_SIZE)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)

        return self.playlist_result(
            self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
            playlist_title=series_info.get('title'),
            playlist_description=series_info.get('content'))
Commit	Line	Data
f8271158	1	import base64
f8271158	2	import binascii
bc83b4b0	3	import functools
3e9b66d7 LNO	4	import hashlib
3e9b66d7 LNO	5	import hmac
f8271158	6	import io
f8271158	7	import json
3e9b66d7 LNO	8	import re
3e9b66d7 LNO	9	import struct
f8271158	10	import time
14f25df2	11	import urllib.parse
ac668111	12	import urllib.request
f9934b96	13	import urllib.response
f9934b96	14	import uuid
e897bd82	15
3e9b66d7 LNO	16	from .common import InfoExtractor
3e9b66d7 LNO	17	from ..aes import aes_ecb_decrypt
3e9b66d7 LNO	18	from ..utils import (
3e9b66d7 LNO	19	ExtractorError,
e897bd82	20	OnDemandPagedList,
f8271158	21	bytes_to_intlist,
7b2c3f47	22	decode_base_n,
3e9b66d7	23	int_or_none,
f8271158	24	intlist_to_bytes,
3e9b66d7	25	time_seconds,
3e9b66d7	26	traverse_obj,
f8271158	27	update_url_query,
3e9b66d7	28	)
e897bd82	29	from ..utils.networking import clean_proxies
3e9b66d7	30
3e9b66d7	31
9f662472	32	def add_opener(ydl, handler): # FIXME: Create proper API in .networking
9f662472	33	"""Add a handler for opening URLs, like _download_webpage"""
3e9b66d7 LNO	34	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
3e9b66d7 LNO	35	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
9f662472	36	rh = ydl._request_director.handlers['Urllib']
	37	if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
	38	return
497bbbbd S	39	headers = ydl.params['http_headers'].copy()
	40	proxies = ydl.proxies.copy()
	41	clean_proxies(proxies, headers)
	42	opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
9f662472	43	assert isinstance(opener, urllib.request.OpenerDirector)
	44	opener.add_handler(handler)
	45	rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
3e9b66d7 LNO	46
3e9b66d7 LNO	47
ac668111	48	class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7 LNO	49	handler_order = 499
	50	STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
	51	HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
	52
	53	def __init__(self, ie: 'AbemaTVIE'):
962ffcf8	54	# the protocol that this should really handle is 'abematv-license://'
3e9b66d7 LNO	55	# abematv_license_open is just a placeholder for development purposes
3e9b66d7 LNO	56	# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
93240fc1	57	setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
3e9b66d7 LNO	58	self.ie = ie
	59
	60	def _get_videokey_from_ticket(self, ticket):
9809740b	61	to_show = self.ie.get_param('verbose', False)
3e9b66d7 LNO	62	media_token = self.ie._get_media_token(to_show=to_show)
	63
	64	license_response = self.ie._download_json(
	65	'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
	66	query={'t': media_token},
	67	data=json.dumps({
	68	'kv': 'a',
	69	'lt': ticket
	70	}).encode('utf-8'),
	71	headers={
	72	'Content-Type': 'application/json',
	73	})
	74
7b2c3f47	75	res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7 LNO	76	encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
	77
	78	h = hmac.new(
f8271158	79	binascii.unhexlify(self.HKEY),
3e9b66d7 LNO	80	(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
	81	digestmod=hashlib.sha256)
	82	enckey = bytes_to_intlist(h.digest())
	83
	84	return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
	85
	86	def abematv_license_open(self, url):
3d2623a8	87	url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
14f25df2	88	ticket = urllib.parse.urlparse(url).netloc
3e9b66d7	89	response_data = self._get_videokey_from_ticket(ticket)
f9934b96	90	return urllib.response.addinfourl(io.BytesIO(response_data), headers={
9f662472	91	'Content-Length': str(len(response_data)),
3e9b66d7 LNO	92	}, url=url, code=200)
	93
	94
	95	class AbemaTVBaseIE(InfoExtractor):
8226a381	96	_NETRC_MACHINE = 'abematv'
8226a381	97
3e9b66d7 LNO	98	_USERTOKEN = None
3e9b66d7 LNO	99	_DEVICE_ID = None
3e9b66d7 LNO	100	_MEDIATOKEN = None
	101
	102	_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
	103
bc83b4b0 L	104	@classmethod
bc83b4b0 L	105	def _generate_aks(cls, deviceid):
3e9b66d7 LNO	106	deviceid = deviceid.encode('utf-8')
3e9b66d7 LNO	107	# add 1 hour and then drop minute and secs
a4f16832	108	ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
3e9b66d7 LNO	109	time_struct = time.gmtime(ts_1hour)
	110	ts_1hour_str = str(ts_1hour).encode('utf-8')
	111
	112	tmp = None
	113
	114	def mix_once(nonce):
	115	nonlocal tmp
bc83b4b0	116	h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
3e9b66d7 LNO	117	h.update(nonce)
	118	tmp = h.digest()
	119
	120	def mix_tmp(count):
	121	nonlocal tmp
	122	for i in range(count):
	123	mix_once(tmp)
	124
	125	def mix_twist(nonce):
	126	nonlocal tmp
f8271158	127	mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7	128
bc83b4b0	129	mix_once(cls._SECRETKEY)
3e9b66d7 LNO	130	mix_tmp(time_struct.tm_mon)
	131	mix_twist(deviceid)
	132	mix_tmp(time_struct.tm_mday % 5)
	133	mix_twist(ts_1hour_str)
	134	mix_tmp(time_struct.tm_hour % 5)
	135
f8271158	136	return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7 LNO	137
	138	def _get_device_token(self):
	139	if self._USERTOKEN:
	140	return self._USERTOKEN
	141
c51316f8	142	add_opener(self._downloader, AbemaLicenseHandler(self))
c51316f8	143
a4f16832	144	username, _ = self._get_login_info()
c51316f8	145	auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
c51316f8	146	AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
a4f16832 L	147	if AbemaTVBaseIE._USERTOKEN:
	148	# try authentication with locally stored token
	149	try:
c51316f8	150	AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
a4f16832 L	151	self._get_media_token(True)
	152	return
	153	except ExtractorError as e:
	154	self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
	155
bc83b4b0	156	AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
3e9b66d7 LNO	157	aks = self._generate_aks(self._DEVICE_ID)
	158	user_data = self._download_json(
	159	'https://api.abema.io/v1/users', None, note='Authorizing',
	160	data=json.dumps({
	161	'deviceId': self._DEVICE_ID,
	162	'applicationKeySecret': aks,
	163	}).encode('utf-8'),
	164	headers={
	165	'Content-Type': 'application/json',
	166	})
bc83b4b0	167	AbemaTVBaseIE._USERTOKEN = user_data['token']
3e9b66d7	168
3e9b66d7 LNO	169	return self._USERTOKEN
	170
	171	def _get_media_token(self, invalidate=False, to_show=True):
	172	if not invalidate and self._MEDIATOKEN:
	173	return self._MEDIATOKEN
	174
bc83b4b0	175	AbemaTVBaseIE._MEDIATOKEN = self._download_json(
3e9b66d7 LNO	176	'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
	177	query={
	178	'osName': 'android',
	179	'osVersion': '6.0.1',
	180	'osLang': 'ja_JP',
	181	'osTimezone': 'Asia/Tokyo',
	182	'appId': 'tv.abema',
	183	'appVersion': '3.27.1'
	184	}, headers={
bc83b4b0	185	'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7 LNO	186	})['token']
	187
	188	return self._MEDIATOKEN
	189
8226a381	190	def _perform_login(self, username, password):
	191	self._get_device_token()
	192	if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
	193	self.write_debug('Skipping logging in')
	194	return
	195
	196	if '@' in username: # don't strictly check if it's email address or not
	197	ep, method = 'user/email', 'email'
	198	else:
	199	ep, method = 'oneTimePassword', 'userId'
	200
	201	login_response = self._download_json(
	202	f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
	203	data=json.dumps({
	204	method: username,
	205	'password': password
	206	}).encode('utf-8'), headers={
	207	'Authorization': f'bearer {self._get_device_token()}',
	208	'Origin': 'https://abema.tv',
	209	'Referer': 'https://abema.tv/',
	210	'Content-Type': 'application/json',
	211	})
	212
	213	AbemaTVBaseIE._USERTOKEN = login_response['token']
	214	self._get_media_token(True)
	215	auth_cache = {
	216	'device_id': AbemaTVBaseIE._DEVICE_ID,
	217	'usertoken': AbemaTVBaseIE._USERTOKEN,
	218	}
	219	self.cache.store(self._NETRC_MACHINE, username, auth_cache)
	220
bc83b4b0 L	221	def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
	222	return self._download_json(
	223	f'https://api.abema.io/{endpoint}', video_id, query=query or {},
	224	note=note,
	225	headers={
	226	'Authorization': f'bearer {self._get_device_token()}',
	227	})
	228
	229	def _extract_breadcrumb_list(self, webpage, video_id):
	230	for jld in re.finditer(
	231	r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
	232	webpage):
	233	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	234	if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
	235	continue
	236	items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
	237	if items:
	238	return items
	239	return []
	240
	241
	242	class AbemaTVIE(AbemaTVBaseIE):
	243	_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air\|video/episode\|channels/.+?/slots)/(?P<id>[^?/]+)'
bc83b4b0 L	244	_TESTS = [{
	245	'url': 'https://abema.tv/video/episode/194-25_s2_p1',
	246	'info_dict': {
	247	'id': '194-25_s2_p1',
	248	'title': '第1話「チーズケーキ」　「モーニング再び」',
	249	'series': '異世界食堂２',
cc07f5cc	250	'season': 'シーズン2',
cc07f5cc	251	'season_number': 2,
bc83b4b0 L	252	'episode': '第1話「チーズケーキ」　「モーニング再び」',
	253	'episode_number': 1,
	254	},
	255	'skip': 'expired',
	256	}, {
	257	'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
	258	'info_dict': {
	259	'id': 'E8tvAnMJ7a9a5d',
	260	'title': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	261	'series': 'ゆるキャン△ SEASON２',
	262	'episode': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
f4f9f6d0	263	'season_number': 2,
bc83b4b0 L	264	'episode_number': 1,
	265	'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
	266	},
	267	'skip': 'expired',
	268	}, {
	269	'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
	270	'info_dict': {
	271	'id': 'E8tvAnMJ7a9a5d',
	272	'title': '第5話『光射す』',
	273	'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
	274	'thumbnail': r're:https://hayabusa\.io/.+',
	275	'series': '相棒',
	276	'episode': '第5話『光射す』',
	277	},
	278	'skip': 'expired',
	279	}, {
	280	'url': 'https://abema.tv/now-on-air/abema-anime',
	281	'info_dict': {
	282	'id': 'abema-anime',
	283	# this varies
	284	# 'title': '女子高生の無駄づかい全話一挙【無料ビデオ72時間】',
	285	'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
	286	'is_live': True,
	287	},
	288	'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
	289	}]
	290	_TIMETABLE = None
	291
3e9b66d7 LNO	292	def _real_extract(self, url):
3e9b66d7 LNO	293	# starting download using infojson from this extractor is undefined behavior,
962ffcf8	294	# and never be fixed in the future; you must trigger downloads by directly specifying URL.
3e9b66d7 LNO	295	# (unless there's a way to hook before downloading by extractor)
	296	video_id, video_type = self._match_valid_url(url).group('id', 'type')
	297	headers = {
	298	'Authorization': 'Bearer ' + self._get_device_token(),
	299	}
	300	video_type = video_type.split('/')[-1]
	301
	302	webpage = self._download_webpage(url, video_id)
	303	canonical_url = self._search_regex(
	304	r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
	305	default=url)
	306	info = self._search_json_ld(webpage, video_id, default={})
	307
	308	title = self._search_regex(
	309	r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
	310	if not title:
	311	jsonld = None
	312	for jld in re.finditer(
	313	r'(?is)<span\sclass="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]>(?P<json_ld>.+?)</script>',
	314	webpage):
	315	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	316	if jsonld:
	317	break
	318	if jsonld:
	319	title = jsonld.get('caption')
	320	if not title and video_type == 'now-on-air':
	321	if not self._TIMETABLE:
	322	# cache the timetable because it goes to 5MiB in size (!!)
	323	self._TIMETABLE = self._download_json(
	324	'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
	325	headers=headers)
	326	now = time_seconds(hours=9)
	327	for slot in self._TIMETABLE.get('slots', []):
	328	if slot.get('channelId') != video_id:
	329	continue
	330	if slot['startAt'] <= now and now < slot['endAt']:
	331	title = slot['title']
	332	break
	333
	334	# read breadcrumb on top of page
	335	breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
	336	if breadcrumb:
62b58c09	337	# breadcrumb list translates to: (e.g. 1st test for this IE)
3e9b66d7 LNO	338	# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
	339	# hence this works
	340	info['series'] = breadcrumb[-2]
	341	info['episode'] = breadcrumb[-1]
	342	if not title:
	343	title = info['episode']
	344
	345	description = self._html_search_regex(
	346	(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
	347	r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
	348	webpage, 'description', default=None, group=1)
	349	if not description:
	350	og_desc = self._html_search_meta(
	351	('description', 'og:description', 'twitter:description'), webpage)
	352	if og_desc:
	353	description = re.sub(r'''(?sx)
	354	^(.+?)(?:
	355	アニメの動画を無料で見るならABEMA！\| # anime
	356	等、.+ # applies for most of categories
	357	)?
	358	''', r'\1', og_desc)
	359
cc07f5cc	360	# canonical URL may contain season and episode number
3e9b66d7 LNO	361	mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
	362	if mobj:
	363	seri = int_or_none(mobj.group(1), default=float('inf'))
	364	epis = int_or_none(mobj.group(2), default=float('inf'))
cc07f5cc	365	info['season_number'] = seri if seri < 100 else None
3e9b66d7 LNO	366	# some anime like Detective Conan (though not available in AbemaTV)
	367	# has more than 1000 episodes (1026 as of 2021/11/15)
	368	info['episode_number'] = epis if epis < 2000 else None
	369
	370	is_live, m3u8_url = False, None
	371	if video_type == 'now-on-air':
	372	is_live = True
	373	channel_url = 'https://api.abema.io/v1/channels'
	374	if video_id == 'news-global':
	375	channel_url = update_url_query(channel_url, {'division': '1'})
	376	onair_channels = self._download_json(channel_url, video_id)
	377	for ch in onair_channels['channels']:
	378	if video_id == ch['id']:
	379	m3u8_url = ch['playback']['hls']
	380	break
	381	else:
	382	raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
	383	elif video_type == 'episode':
	384	api_response = self._download_json(
	385	f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
	386	note='Checking playability',
	387	headers=headers)
6839ae1f	388	ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
3e9b66d7 LNO	389	if 3 not in ondemand_types:
	390	# cannot acquire decryption key for these streams
	391	self.report_warning('This is a premium-only stream')
c449c065 L	392	info.update(traverse_obj(api_response, {
c449c065 L	393	'series': ('series', 'title'),
cc07f5cc	394	'season': ('season', 'name'),
c449c065 L	395	'season_number': ('season', 'sequence'),
	396	'episode_number': ('episode', 'number'),
	397	}))
	398	if not title:
	399	title = traverse_obj(api_response, ('episode', 'title'))
	400	if not description:
	401	description = traverse_obj(api_response, ('episode', 'content'))
3e9b66d7 LNO	402
	403	m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
	404	elif video_type == 'slots':
	405	api_response = self._download_json(
	406	f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
	407	note='Checking playability',
	408	headers=headers)
	409	if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
	410	self.report_warning('This is a premium-only stream')
	411
	412	m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
	413	else:
	414	raise ExtractorError('Unreachable')
	415
	416	if is_live:
	417	self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
	418	self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
	419	formats = self._extract_m3u8_formats(
	420	m3u8_url, video_id, ext='mp4', live=is_live)
	421
	422	info.update({
	423	'id': video_id,
	424	'title': title,
	425	'description': description,
	426	'formats': formats,
	427	'is_live': is_live,
	428	})
	429	return info
	430
	431
	432	class AbemaTVTitleIE(AbemaTVBaseIE):
	433	_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
bc83b4b0	434	_PAGE_SIZE = 25
3e9b66d7 LNO	435
	436	_TESTS = [{
	437	'url': 'https://abema.tv/video/title/90-1597',
	438	'info_dict': {
	439	'id': '90-1597',
	440	'title': 'シャッフルアイランド',
	441	},
	442	'playlist_mincount': 2,
	443	}, {
	444	'url': 'https://abema.tv/video/title/193-132',
	445	'info_dict': {
	446	'id': '193-132',
	447	'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
	448	},
	449	'playlist_mincount': 16,
bc83b4b0 L	450	}, {
	451	'url': 'https://abema.tv/video/title/25-102',
	452	'info_dict': {
	453	'id': '25-102',
	454	'title': 'ソードアート・オンラインアリシゼーション',
	455	},
	456	'playlist_mincount': 24,
3e9b66d7 LNO	457	}]
3e9b66d7 LNO	458
bc83b4b0 L	459	def _fetch_page(self, playlist_id, series_version, page):
	460	programs = self._call_api(
	461	f'v1/video/series/{playlist_id}/programs', playlist_id,
	462	note=f'Downloading page {page + 1}',
	463	query={
	464	'seriesVersion': series_version,
	465	'offset': str(page * self._PAGE_SIZE),
	466	'order': 'seq',
	467	'limit': str(self._PAGE_SIZE),
	468	})
	469	yield from (
	470	self.url_result(f'https://abema.tv/video/episode/{x}')
6839ae1f	471	for x in traverse_obj(programs, ('programs', ..., 'id')))
3e9b66d7	472
bc83b4b0 L	473	def _entries(self, playlist_id, series_version):
	474	return OnDemandPagedList(
	475	functools.partial(self._fetch_page, playlist_id, series_version),
	476	self._PAGE_SIZE)
3e9b66d7	477
bc83b4b0 L	478	def _real_extract(self, url):
	479	playlist_id = self._match_id(url)
	480	series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
3e9b66d7	481
bc83b4b0 L	482	return self.playlist_result(
	483	self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
	484	playlist_title=series_info.get('title'),
	485	playlist_description=series_info.get('content'))