[yt-dlp.git] / yt_dlp / extractor / abematv.py

import base64
import binascii
import functools
import hashlib
import hmac
import io
import json
import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid
from ..utils.networking import clean_proxies
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
    ExtractorError,
    bytes_to_intlist,
    decode_base_n,
    int_or_none,
    intlist_to_bytes,
    OnDemandPagedList,
    time_seconds,
    traverse_obj,
    update_url_query,
)


def add_opener(ydl, handler):  # FIXME: Create proper API in .networking
    """Add a handler for opening URLs, like _download_webpage"""
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
    rh = ydl._request_director.handlers['Urllib']
    if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
        return
    headers = ydl.params['http_headers'].copy()
    proxies = ydl.proxies.copy()
    clean_proxies(proxies, headers)
    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
    assert isinstance(opener, urllib.request.OpenerDirector)
    opener.add_handler(handler)
    rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')


class AbemaLicenseHandler(urllib.request.BaseHandler):
    handler_order = 499
    STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
    HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'

    def __init__(self, ie: 'AbemaTVIE'):
        # the protocol that this should really handle is 'abematv-license://'
        # abematv_license_open is just a placeholder for development purposes
        # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
        setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
        self.ie = ie

    def _get_videokey_from_ticket(self, ticket):
        to_show = self.ie.get_param('verbose', False)
        media_token = self.ie._get_media_token(to_show=to_show)

        license_response = self.ie._download_json(
            'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
            query={'t': media_token},
            data=json.dumps({
                'kv': 'a',
                'lt': ticket
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })

        res = decode_base_n(license_response['k'], table=self.STRTABLE)
        encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))

        h = hmac.new(
            binascii.unhexlify(self.HKEY),
            (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
            digestmod=hashlib.sha256)
        enckey = bytes_to_intlist(h.digest())

        return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))

    def abematv_license_open(self, url):
        url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
        ticket = urllib.parse.urlparse(url).netloc
        response_data = self._get_videokey_from_ticket(ticket)
        return urllib.response.addinfourl(io.BytesIO(response_data), headers={
            'Content-Length': str(len(response_data)),
        }, url=url, code=200)


class AbemaTVBaseIE(InfoExtractor):
    _USERTOKEN = None
    _DEVICE_ID = None
    _MEDIATOKEN = None

    _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'

    @classmethod
    def _generate_aks(cls, deviceid):
        deviceid = deviceid.encode('utf-8')
        # add 1 hour and then drop minute and secs
        ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
        time_struct = time.gmtime(ts_1hour)
        ts_1hour_str = str(ts_1hour).encode('utf-8')

        tmp = None

        def mix_once(nonce):
            nonlocal tmp
            h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
            h.update(nonce)
            tmp = h.digest()

        def mix_tmp(count):
            nonlocal tmp
            for i in range(count):
                mix_once(tmp)

        def mix_twist(nonce):
            nonlocal tmp
            mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)

        mix_once(cls._SECRETKEY)
        mix_tmp(time_struct.tm_mon)
        mix_twist(deviceid)
        mix_tmp(time_struct.tm_mday % 5)
        mix_twist(ts_1hour_str)
        mix_tmp(time_struct.tm_hour % 5)

        return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')

    def _get_device_token(self):
        if self._USERTOKEN:
            return self._USERTOKEN

        username, _ = self._get_login_info()
        AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
        if AbemaTVBaseIE._USERTOKEN:
            # try authentication with locally stored token
            try:
                self._get_media_token(True)
                return
            except ExtractorError as e:
                self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')

        AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
        aks = self._generate_aks(self._DEVICE_ID)
        user_data = self._download_json(
            'https://api.abema.io/v1/users', None, note='Authorizing',
            data=json.dumps({
                'deviceId': self._DEVICE_ID,
                'applicationKeySecret': aks,
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })
        AbemaTVBaseIE._USERTOKEN = user_data['token']

        add_opener(self._downloader, AbemaLicenseHandler(self))
        return self._USERTOKEN

    def _get_media_token(self, invalidate=False, to_show=True):
        if not invalidate and self._MEDIATOKEN:
            return self._MEDIATOKEN

        AbemaTVBaseIE._MEDIATOKEN = self._download_json(
            'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
            query={
                'osName': 'android',
                'osVersion': '6.0.1',
                'osLang': 'ja_JP',
                'osTimezone': 'Asia/Tokyo',
                'appId': 'tv.abema',
                'appVersion': '3.27.1'
            }, headers={
                'Authorization': f'bearer {self._get_device_token()}',
            })['token']

        return self._MEDIATOKEN

    def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
        return self._download_json(
            f'https://api.abema.io/{endpoint}', video_id, query=query or {},
            note=note,
            headers={
                'Authorization': f'bearer {self._get_device_token()}',
            })

    def _extract_breadcrumb_list(self, webpage, video_id):
        for jld in re.finditer(
                r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                webpage):
            jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
            if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
                continue
            items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
            if items:
                return items
        return []


class AbemaTVIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
    _NETRC_MACHINE = 'abematv'
    _TESTS = [{
        'url': 'https://abema.tv/video/episode/194-25_s2_p1',
        'info_dict': {
            'id': '194-25_s2_p1',
            'title': '第1話 「チーズケーキ」　「モーニング再び」',
            'series': '異世界食堂２',
            'series_number': 2,
            'episode': '第1話 「チーズケーキ」　「モーニング再び」',
            'episode_number': 1,
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'series': 'ゆるキャン△ SEASON２',
            'episode': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'series_number': 2,
            'episode_number': 1,
            'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': '第5話『光射す』',
            'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
            'thumbnail': r're:https://hayabusa\.io/.+',
            'series': '相棒',
            'episode': '第5話『光射す』',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/now-on-air/abema-anime',
        'info_dict': {
            'id': 'abema-anime',
            # this varies
            # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
            'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
            'is_live': True,
        },
        'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
    }]
    _TIMETABLE = None

    def _perform_login(self, username, password):
        self._get_device_token()
        if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
            self.write_debug('Skipping logging in')
            return

        if '@' in username:  # don't strictly check if it's email address or not
            ep, method = 'user/email', 'email'
        else:
            ep, method = 'oneTimePassword', 'userId'

        login_response = self._download_json(
            f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
            data=json.dumps({
                method: username,
                'password': password
            }).encode('utf-8'), headers={
                'Authorization': f'bearer {self._get_device_token()}',
                'Origin': 'https://abema.tv',
                'Referer': 'https://abema.tv/',
                'Content-Type': 'application/json',
            })

        AbemaTVBaseIE._USERTOKEN = login_response['token']
        self._get_media_token(True)
        self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)

    def _real_extract(self, url):
        # starting download using infojson from this extractor is undefined behavior,
        # and never be fixed in the future; you must trigger downloads by directly specifying URL.
        # (unless there's a way to hook before downloading by extractor)
        video_id, video_type = self._match_valid_url(url).group('id', 'type')
        headers = {
            'Authorization': 'Bearer ' + self._get_device_token(),
        }
        video_type = video_type.split('/')[-1]

        webpage = self._download_webpage(url, video_id)
        canonical_url = self._search_regex(
            r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
            default=url)
        info = self._search_json_ld(webpage, video_id, default={})

        title = self._search_regex(
            r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
        if not title:
            jsonld = None
            for jld in re.finditer(
                    r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                    webpage):
                jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
                if jsonld:
                    break
            if jsonld:
                title = jsonld.get('caption')
        if not title and video_type == 'now-on-air':
            if not self._TIMETABLE:
                # cache the timetable because it goes to 5MiB in size (!!)
                self._TIMETABLE = self._download_json(
                    'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
                    headers=headers)
            now = time_seconds(hours=9)
            for slot in self._TIMETABLE.get('slots', []):
                if slot.get('channelId') != video_id:
                    continue
                if slot['startAt'] <= now and now < slot['endAt']:
                    title = slot['title']
                    break

        # read breadcrumb on top of page
        breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
        if breadcrumb:
            # breadcrumb list translates to: (e.g. 1st test for this IE)
            # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
            # hence this works
            info['series'] = breadcrumb[-2]
            info['episode'] = breadcrumb[-1]
            if not title:
                title = info['episode']

        description = self._html_search_regex(
            (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
             r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
            webpage, 'description', default=None, group=1)
        if not description:
            og_desc = self._html_search_meta(
                ('description', 'og:description', 'twitter:description'), webpage)
            if og_desc:
                description = re.sub(r'''(?sx)
                    ^(.+?)(?:
                        アニメの動画を無料で見るならABEMA！| # anime
                        等、.+ # applies for most of categories
                    )?
                ''', r'\1', og_desc)

        # canonical URL may contain series and episode number
        mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
        if mobj:
            seri = int_or_none(mobj.group(1), default=float('inf'))
            epis = int_or_none(mobj.group(2), default=float('inf'))
            info['series_number'] = seri if seri < 100 else None
            # some anime like Detective Conan (though not available in AbemaTV)
            # has more than 1000 episodes (1026 as of 2021/11/15)
            info['episode_number'] = epis if epis < 2000 else None

        is_live, m3u8_url = False, None
        if video_type == 'now-on-air':
            is_live = True
            channel_url = 'https://api.abema.io/v1/channels'
            if video_id == 'news-global':
                channel_url = update_url_query(channel_url, {'division': '1'})
            onair_channels = self._download_json(channel_url, video_id)
            for ch in onair_channels['channels']:
                if video_id == ch['id']:
                    m3u8_url = ch['playback']['hls']
                    break
            else:
                raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
        elif video_type == 'episode':
            api_response = self._download_json(
                f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
            if 3 not in ondemand_types:
                # cannot acquire decryption key for these streams
                self.report_warning('This is a premium-only stream')
            info.update(traverse_obj(api_response, {
                'series': ('series', 'title'),
                'season': ('season', 'title'),
                'season_number': ('season', 'sequence'),
                'episode_number': ('episode', 'number'),
            }))
            if not title:
                title = traverse_obj(api_response, ('episode', 'title'))
            if not description:
                description = traverse_obj(api_response, ('episode', 'content'))

            m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
        elif video_type == 'slots':
            api_response = self._download_json(
                f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
                self.report_warning('This is a premium-only stream')

            m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
        else:
            raise ExtractorError('Unreachable')

        if is_live:
            self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
            self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
        formats = self._extract_m3u8_formats(
            m3u8_url, video_id, ext='mp4', live=is_live)

        info.update({
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
            'is_live': is_live,
        })
        return info


class AbemaTVTitleIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
    _PAGE_SIZE = 25

    _TESTS = [{
        'url': 'https://abema.tv/video/title/90-1597',
        'info_dict': {
            'id': '90-1597',
            'title': 'シャッフルアイランド',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'https://abema.tv/video/title/193-132',
        'info_dict': {
            'id': '193-132',
            'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
        },
        'playlist_mincount': 16,
    }, {
        'url': 'https://abema.tv/video/title/25-102',
        'info_dict': {
            'id': '25-102',
            'title': 'ソードアート・オンライン アリシゼーション',
        },
        'playlist_mincount': 24,
    }]

    def _fetch_page(self, playlist_id, series_version, page):
        programs = self._call_api(
            f'v1/video/series/{playlist_id}/programs', playlist_id,
            note=f'Downloading page {page + 1}',
            query={
                'seriesVersion': series_version,
                'offset': str(page * self._PAGE_SIZE),
                'order': 'seq',
                'limit': str(self._PAGE_SIZE),
            })
        yield from (
            self.url_result(f'https://abema.tv/video/episode/{x}')
            for x in traverse_obj(programs, ('programs', ..., 'id')))

    def _entries(self, playlist_id, series_version):
        return OnDemandPagedList(
            functools.partial(self._fetch_page, playlist_id, series_version),
            self._PAGE_SIZE)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)

        return self.playlist_result(
            self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
            playlist_title=series_info.get('title'),
            playlist_description=series_info.get('content'))
Commit	Line	Data
f8271158	1	import base64
f8271158	2	import binascii
bc83b4b0	3	import functools
3e9b66d7 LNO	4	import hashlib
3e9b66d7 LNO	5	import hmac
f8271158	6	import io
f8271158	7	import json
3e9b66d7 LNO	8	import re
3e9b66d7 LNO	9	import struct
f8271158	10	import time
14f25df2	11	import urllib.parse
ac668111	12	import urllib.request
f9934b96	13	import urllib.response
f9934b96	14	import uuid
497bbbbd	15	from ..utils.networking import clean_proxies
3e9b66d7 LNO	16	from .common import InfoExtractor
3e9b66d7 LNO	17	from ..aes import aes_ecb_decrypt
3e9b66d7 LNO	18	from ..utils import (
3e9b66d7 LNO	19	ExtractorError,
f8271158	20	bytes_to_intlist,
7b2c3f47	21	decode_base_n,
3e9b66d7	22	int_or_none,
f8271158	23	intlist_to_bytes,
bc83b4b0	24	OnDemandPagedList,
3e9b66d7	25	time_seconds,
3e9b66d7	26	traverse_obj,
f8271158	27	update_url_query,
3e9b66d7 LNO	28	)
3e9b66d7 LNO	29
3e9b66d7	30
9f662472	31	def add_opener(ydl, handler): # FIXME: Create proper API in .networking
9f662472	32	"""Add a handler for opening URLs, like _download_webpage"""
3e9b66d7 LNO	33	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
3e9b66d7 LNO	34	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
9f662472	35	rh = ydl._request_director.handlers['Urllib']
	36	if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
	37	return
497bbbbd S	38	headers = ydl.params['http_headers'].copy()
	39	proxies = ydl.proxies.copy()
	40	clean_proxies(proxies, headers)
	41	opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
9f662472	42	assert isinstance(opener, urllib.request.OpenerDirector)
	43	opener.add_handler(handler)
	44	rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
3e9b66d7 LNO	45
3e9b66d7 LNO	46
ac668111	47	class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7 LNO	48	handler_order = 499
	49	STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
	50	HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
	51
	52	def __init__(self, ie: 'AbemaTVIE'):
962ffcf8	53	# the protocol that this should really handle is 'abematv-license://'
3e9b66d7 LNO	54	# abematv_license_open is just a placeholder for development purposes
	55	# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
	56	setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
	57	self.ie = ie
	58
	59	def _get_videokey_from_ticket(self, ticket):
9809740b	60	to_show = self.ie.get_param('verbose', False)
3e9b66d7 LNO	61	media_token = self.ie._get_media_token(to_show=to_show)
	62
	63	license_response = self.ie._download_json(
	64	'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
	65	query={'t': media_token},
	66	data=json.dumps({
	67	'kv': 'a',
	68	'lt': ticket
	69	}).encode('utf-8'),
	70	headers={
	71	'Content-Type': 'application/json',
	72	})
	73
7b2c3f47	74	res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7 LNO	75	encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
	76
	77	h = hmac.new(
f8271158	78	binascii.unhexlify(self.HKEY),
3e9b66d7 LNO	79	(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
	80	digestmod=hashlib.sha256)
	81	enckey = bytes_to_intlist(h.digest())
	82
	83	return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
	84
	85	def abematv_license_open(self, url):
3d2623a8	86	url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
14f25df2	87	ticket = urllib.parse.urlparse(url).netloc
3e9b66d7	88	response_data = self._get_videokey_from_ticket(ticket)
f9934b96	89	return urllib.response.addinfourl(io.BytesIO(response_data), headers={
9f662472	90	'Content-Length': str(len(response_data)),
3e9b66d7 LNO	91	}, url=url, code=200)
	92
	93
	94	class AbemaTVBaseIE(InfoExtractor):
3e9b66d7 LNO	95	_USERTOKEN = None
3e9b66d7 LNO	96	_DEVICE_ID = None
3e9b66d7 LNO	97	_MEDIATOKEN = None
	98
	99	_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
	100
bc83b4b0 L	101	@classmethod
bc83b4b0 L	102	def _generate_aks(cls, deviceid):
3e9b66d7 LNO	103	deviceid = deviceid.encode('utf-8')
3e9b66d7 LNO	104	# add 1 hour and then drop minute and secs
a4f16832	105	ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
3e9b66d7 LNO	106	time_struct = time.gmtime(ts_1hour)
	107	ts_1hour_str = str(ts_1hour).encode('utf-8')
	108
	109	tmp = None
	110
	111	def mix_once(nonce):
	112	nonlocal tmp
bc83b4b0	113	h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
3e9b66d7 LNO	114	h.update(nonce)
	115	tmp = h.digest()
	116
	117	def mix_tmp(count):
	118	nonlocal tmp
	119	for i in range(count):
	120	mix_once(tmp)
	121
	122	def mix_twist(nonce):
	123	nonlocal tmp
f8271158	124	mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7	125
bc83b4b0	126	mix_once(cls._SECRETKEY)
3e9b66d7 LNO	127	mix_tmp(time_struct.tm_mon)
	128	mix_twist(deviceid)
	129	mix_tmp(time_struct.tm_mday % 5)
	130	mix_twist(ts_1hour_str)
	131	mix_tmp(time_struct.tm_hour % 5)
	132
f8271158	133	return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7 LNO	134
	135	def _get_device_token(self):
	136	if self._USERTOKEN:
	137	return self._USERTOKEN
	138
a4f16832 L	139	username, _ = self._get_login_info()
	140	AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
	141	if AbemaTVBaseIE._USERTOKEN:
	142	# try authentication with locally stored token
	143	try:
	144	self._get_media_token(True)
	145	return
	146	except ExtractorError as e:
	147	self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
	148
bc83b4b0	149	AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
3e9b66d7 LNO	150	aks = self._generate_aks(self._DEVICE_ID)
	151	user_data = self._download_json(
	152	'https://api.abema.io/v1/users', None, note='Authorizing',
	153	data=json.dumps({
	154	'deviceId': self._DEVICE_ID,
	155	'applicationKeySecret': aks,
	156	}).encode('utf-8'),
	157	headers={
	158	'Content-Type': 'application/json',
	159	})
bc83b4b0	160	AbemaTVBaseIE._USERTOKEN = user_data['token']
3e9b66d7	161
3e9b66d7	162	add_opener(self._downloader, AbemaLicenseHandler(self))
3e9b66d7 LNO	163	return self._USERTOKEN
	164
	165	def _get_media_token(self, invalidate=False, to_show=True):
	166	if not invalidate and self._MEDIATOKEN:
	167	return self._MEDIATOKEN
	168
bc83b4b0	169	AbemaTVBaseIE._MEDIATOKEN = self._download_json(
3e9b66d7 LNO	170	'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
	171	query={
	172	'osName': 'android',
	173	'osVersion': '6.0.1',
	174	'osLang': 'ja_JP',
	175	'osTimezone': 'Asia/Tokyo',
	176	'appId': 'tv.abema',
	177	'appVersion': '3.27.1'
	178	}, headers={
bc83b4b0	179	'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7 LNO	180	})['token']
	181
	182	return self._MEDIATOKEN
	183
bc83b4b0 L	184	def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
	185	return self._download_json(
	186	f'https://api.abema.io/{endpoint}', video_id, query=query or {},
	187	note=note,
	188	headers={
	189	'Authorization': f'bearer {self._get_device_token()}',
	190	})
	191
	192	def _extract_breadcrumb_list(self, webpage, video_id):
	193	for jld in re.finditer(
	194	r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
	195	webpage):
	196	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	197	if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
	198	continue
	199	items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
	200	if items:
	201	return items
	202	return []
	203
	204
	205	class AbemaTVIE(AbemaTVBaseIE):
	206	_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air\|video/episode\|channels/.+?/slots)/(?P<id>[^?/]+)'
	207	_NETRC_MACHINE = 'abematv'
	208	_TESTS = [{
	209	'url': 'https://abema.tv/video/episode/194-25_s2_p1',
	210	'info_dict': {
	211	'id': '194-25_s2_p1',
	212	'title': '第1話「チーズケーキ」　「モーニング再び」',
	213	'series': '異世界食堂２',
	214	'series_number': 2,
	215	'episode': '第1話「チーズケーキ」　「モーニング再び」',
	216	'episode_number': 1,
	217	},
	218	'skip': 'expired',
	219	}, {
	220	'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
	221	'info_dict': {
	222	'id': 'E8tvAnMJ7a9a5d',
	223	'title': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	224	'series': 'ゆるキャン△ SEASON２',
	225	'episode': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	226	'series_number': 2,
	227	'episode_number': 1,
	228	'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
	229	},
	230	'skip': 'expired',
	231	}, {
	232	'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
	233	'info_dict': {
	234	'id': 'E8tvAnMJ7a9a5d',
	235	'title': '第5話『光射す』',
	236	'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
	237	'thumbnail': r're:https://hayabusa\.io/.+',
	238	'series': '相棒',
	239	'episode': '第5話『光射す』',
	240	},
	241	'skip': 'expired',
	242	}, {
	243	'url': 'https://abema.tv/now-on-air/abema-anime',
	244	'info_dict': {
	245	'id': 'abema-anime',
	246	# this varies
	247	# 'title': '女子高生の無駄づかい全話一挙【無料ビデオ72時間】',
248	'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
249	'is_live': True,
250	},
251	'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
252	}]
253	_TIMETABLE = None
254
52efa4b3	255	def _perform_login(self, username, password):
a4f16832 L	256	self._get_device_token()
	257	if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
	258	self.write_debug('Skipping logging in')
	259	return
	260
3e9b66d7 LNO	261	if '@' in username: # don't strictly check if it's email address or not
	262	ep, method = 'user/email', 'email'
	263	else:
	264	ep, method = 'oneTimePassword', 'userId'
	265
	266	login_response = self._download_json(
	267	f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
	268	data=json.dumps({
	269	method: username,
	270	'password': password
	271	}).encode('utf-8'), headers={
bc83b4b0	272	'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7 LNO	273	'Origin': 'https://abema.tv',
	274	'Referer': 'https://abema.tv/',
	275	'Content-Type': 'application/json',
	276	})
	277
bc83b4b0	278	AbemaTVBaseIE._USERTOKEN = login_response['token']
3e9b66d7	279	self._get_media_token(True)
a4f16832	280	self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
3e9b66d7 LNO	281
	282	def _real_extract(self, url):
	283	# starting download using infojson from this extractor is undefined behavior,
962ffcf8	284	# and never be fixed in the future; you must trigger downloads by directly specifying URL.
3e9b66d7 LNO	285	# (unless there's a way to hook before downloading by extractor)
	286	video_id, video_type = self._match_valid_url(url).group('id', 'type')
	287	headers = {
	288	'Authorization': 'Bearer ' + self._get_device_token(),
	289	}
	290	video_type = video_type.split('/')[-1]
	291
	292	webpage = self._download_webpage(url, video_id)
	293	canonical_url = self._search_regex(
	294	r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
	295	default=url)
	296	info = self._search_json_ld(webpage, video_id, default={})
	297
	298	title = self._search_regex(
	299	r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
	300	if not title:
	301	jsonld = None
	302	for jld in re.finditer(
	303	r'(?is)<span\sclass="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]>(?P<json_ld>.+?)</script>',
	304	webpage):
	305	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	306	if jsonld:
	307	break
	308	if jsonld:
	309	title = jsonld.get('caption')
	310	if not title and video_type == 'now-on-air':
	311	if not self._TIMETABLE:
	312	# cache the timetable because it goes to 5MiB in size (!!)
	313	self._TIMETABLE = self._download_json(
	314	'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
	315	headers=headers)
	316	now = time_seconds(hours=9)
	317	for slot in self._TIMETABLE.get('slots', []):
	318	if slot.get('channelId') != video_id:
	319	continue
	320	if slot['startAt'] <= now and now < slot['endAt']:
	321	title = slot['title']
	322	break
	323
	324	# read breadcrumb on top of page
	325	breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
	326	if breadcrumb:
62b58c09	327	# breadcrumb list translates to: (e.g. 1st test for this IE)
3e9b66d7 LNO	328	# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
	329	# hence this works
	330	info['series'] = breadcrumb[-2]
	331	info['episode'] = breadcrumb[-1]
	332	if not title:
	333	title = info['episode']
	334
	335	description = self._html_search_regex(
	336	(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
	337	r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
	338	webpage, 'description', default=None, group=1)
	339	if not description:
	340	og_desc = self._html_search_meta(
	341	('description', 'og:description', 'twitter:description'), webpage)
	342	if og_desc:
	343	description = re.sub(r'''(?sx)
	344	^(.+?)(?:
	345	アニメの動画を無料で見るならABEMA！\| # anime
	346	等、.+ # applies for most of categories
	347	)?
	348	''', r'\1', og_desc)
	349
	350	# canonical URL may contain series and episode number
	351	mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
	352	if mobj:
	353	seri = int_or_none(mobj.group(1), default=float('inf'))
	354	epis = int_or_none(mobj.group(2), default=float('inf'))
	355	info['series_number'] = seri if seri < 100 else None
	356	# some anime like Detective Conan (though not available in AbemaTV)
	357	# has more than 1000 episodes (1026 as of 2021/11/15)
	358	info['episode_number'] = epis if epis < 2000 else None
	359
	360	is_live, m3u8_url = False, None
	361	if video_type == 'now-on-air':
	362	is_live = True
	363	channel_url = 'https://api.abema.io/v1/channels'
	364	if video_id == 'news-global':
	365	channel_url = update_url_query(channel_url, {'division': '1'})
	366	onair_channels = self._download_json(channel_url, video_id)
	367	for ch in onair_channels['channels']:
	368	if video_id == ch['id']:
	369	m3u8_url = ch['playback']['hls']
	370	break
	371	else:
	372	raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
	373	elif video_type == 'episode':
	374	api_response = self._download_json(
	375	f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
	376	note='Checking playability',
	377	headers=headers)
6839ae1f	378	ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
3e9b66d7 LNO	379	if 3 not in ondemand_types:
	380	# cannot acquire decryption key for these streams
	381	self.report_warning('This is a premium-only stream')
c449c065 L	382	info.update(traverse_obj(api_response, {
	383	'series': ('series', 'title'),
	384	'season': ('season', 'title'),
	385	'season_number': ('season', 'sequence'),
	386	'episode_number': ('episode', 'number'),
	387	}))
	388	if not title:
	389	title = traverse_obj(api_response, ('episode', 'title'))
	390	if not description:
	391	description = traverse_obj(api_response, ('episode', 'content'))
3e9b66d7 LNO	392
	393	m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
	394	elif video_type == 'slots':
	395	api_response = self._download_json(
	396	f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
	397	note='Checking playability',
	398	headers=headers)
	399	if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
	400	self.report_warning('This is a premium-only stream')
	401
	402	m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
	403	else:
	404	raise ExtractorError('Unreachable')
	405
	406	if is_live:
	407	self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
	408	self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
	409	formats = self._extract_m3u8_formats(
	410	m3u8_url, video_id, ext='mp4', live=is_live)
	411
	412	info.update({
	413	'id': video_id,
	414	'title': title,
	415	'description': description,
	416	'formats': formats,
	417	'is_live': is_live,
	418	})
	419	return info
	420
	421
	422	class AbemaTVTitleIE(AbemaTVBaseIE):
	423	_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
bc83b4b0	424	_PAGE_SIZE = 25
3e9b66d7 LNO	425
	426	_TESTS = [{
	427	'url': 'https://abema.tv/video/title/90-1597',
	428	'info_dict': {
	429	'id': '90-1597',
	430	'title': 'シャッフルアイランド',
	431	},
	432	'playlist_mincount': 2,
	433	}, {
	434	'url': 'https://abema.tv/video/title/193-132',
	435	'info_dict': {
	436	'id': '193-132',
	437	'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
	438	},
	439	'playlist_mincount': 16,
bc83b4b0 L	440	}, {
	441	'url': 'https://abema.tv/video/title/25-102',
	442	'info_dict': {
	443	'id': '25-102',
	444	'title': 'ソードアート・オンラインアリシゼーション',
	445	},
	446	'playlist_mincount': 24,
3e9b66d7 LNO	447	}]
3e9b66d7 LNO	448
bc83b4b0 L	449	def _fetch_page(self, playlist_id, series_version, page):
	450	programs = self._call_api(
	451	f'v1/video/series/{playlist_id}/programs', playlist_id,
	452	note=f'Downloading page {page + 1}',
	453	query={
	454	'seriesVersion': series_version,
	455	'offset': str(page * self._PAGE_SIZE),
	456	'order': 'seq',
	457	'limit': str(self._PAGE_SIZE),
	458	})
	459	yield from (
	460	self.url_result(f'https://abema.tv/video/episode/{x}')
6839ae1f	461	for x in traverse_obj(programs, ('programs', ..., 'id')))
3e9b66d7	462
bc83b4b0 L	463	def _entries(self, playlist_id, series_version):
	464	return OnDemandPagedList(
	465	functools.partial(self._fetch_page, playlist_id, series_version),
	466	self._PAGE_SIZE)
3e9b66d7	467
bc83b4b0 L	468	def _real_extract(self, url):
	469	playlist_id = self._match_id(url)
	470	series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
3e9b66d7	471
bc83b4b0 L	472	return self.playlist_result(
	473	self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
	474	playlist_title=series_info.get('title'),
	475	playlist_description=series_info.get('content'))