[yt-dlp.git] / yt_dlp / extractor / abematv.py

import base64
import binascii
import functools
import hashlib
import hmac
import io
import json
import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid

from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
    ExtractorError,
    bytes_to_intlist,
    decode_base_n,
    int_or_none,
    intlist_to_bytes,
    OnDemandPagedList,
    time_seconds,
    traverse_obj,
    update_url_query,
)


def add_opener(ydl, handler):  # FIXME: Create proper API in .networking
    """Add a handler for opening URLs, like _download_webpage"""
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
    rh = ydl._request_director.handlers['Urllib']
    if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
        return
    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
    assert isinstance(opener, urllib.request.OpenerDirector)
    opener.add_handler(handler)
    rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')


class AbemaLicenseHandler(urllib.request.BaseHandler):
    handler_order = 499
    STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
    HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'

    def __init__(self, ie: 'AbemaTVIE'):
        # the protocol that this should really handle is 'abematv-license://'
        # abematv_license_open is just a placeholder for development purposes
        # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
        setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
        self.ie = ie

    def _get_videokey_from_ticket(self, ticket):
        to_show = self.ie.get_param('verbose', False)
        media_token = self.ie._get_media_token(to_show=to_show)

        license_response = self.ie._download_json(
            'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
            query={'t': media_token},
            data=json.dumps({
                'kv': 'a',
                'lt': ticket
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })

        res = decode_base_n(license_response['k'], table=self.STRTABLE)
        encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))

        h = hmac.new(
            binascii.unhexlify(self.HKEY),
            (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
            digestmod=hashlib.sha256)
        enckey = bytes_to_intlist(h.digest())

        return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))

    def abematv_license_open(self, url):
        url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
        ticket = urllib.parse.urlparse(url).netloc
        response_data = self._get_videokey_from_ticket(ticket)
        return urllib.response.addinfourl(io.BytesIO(response_data), headers={
            'Content-Length': str(len(response_data)),
        }, url=url, code=200)


class AbemaTVBaseIE(InfoExtractor):
    _USERTOKEN = None
    _DEVICE_ID = None
    _MEDIATOKEN = None

    _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'

    @classmethod
    def _generate_aks(cls, deviceid):
        deviceid = deviceid.encode('utf-8')
        # add 1 hour and then drop minute and secs
        ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
        time_struct = time.gmtime(ts_1hour)
        ts_1hour_str = str(ts_1hour).encode('utf-8')

        tmp = None

        def mix_once(nonce):
            nonlocal tmp
            h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
            h.update(nonce)
            tmp = h.digest()

        def mix_tmp(count):
            nonlocal tmp
            for i in range(count):
                mix_once(tmp)

        def mix_twist(nonce):
            nonlocal tmp
            mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)

        mix_once(cls._SECRETKEY)
        mix_tmp(time_struct.tm_mon)
        mix_twist(deviceid)
        mix_tmp(time_struct.tm_mday % 5)
        mix_twist(ts_1hour_str)
        mix_tmp(time_struct.tm_hour % 5)

        return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')

    def _get_device_token(self):
        if self._USERTOKEN:
            return self._USERTOKEN

        username, _ = self._get_login_info()
        AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
        if AbemaTVBaseIE._USERTOKEN:
            # try authentication with locally stored token
            try:
                self._get_media_token(True)
                return
            except ExtractorError as e:
                self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')

        AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
        aks = self._generate_aks(self._DEVICE_ID)
        user_data = self._download_json(
            'https://api.abema.io/v1/users', None, note='Authorizing',
            data=json.dumps({
                'deviceId': self._DEVICE_ID,
                'applicationKeySecret': aks,
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })
        AbemaTVBaseIE._USERTOKEN = user_data['token']

        add_opener(self._downloader, AbemaLicenseHandler(self))
        return self._USERTOKEN

    def _get_media_token(self, invalidate=False, to_show=True):
        if not invalidate and self._MEDIATOKEN:
            return self._MEDIATOKEN

        AbemaTVBaseIE._MEDIATOKEN = self._download_json(
            'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
            query={
                'osName': 'android',
                'osVersion': '6.0.1',
                'osLang': 'ja_JP',
                'osTimezone': 'Asia/Tokyo',
                'appId': 'tv.abema',
                'appVersion': '3.27.1'
            }, headers={
                'Authorization': f'bearer {self._get_device_token()}',
            })['token']

        return self._MEDIATOKEN

    def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
        return self._download_json(
            f'https://api.abema.io/{endpoint}', video_id, query=query or {},
            note=note,
            headers={
                'Authorization': f'bearer {self._get_device_token()}',
            })

    def _extract_breadcrumb_list(self, webpage, video_id):
        for jld in re.finditer(
                r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                webpage):
            jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
            if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
                continue
            items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
            if items:
                return items
        return []


class AbemaTVIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
    _NETRC_MACHINE = 'abematv'
    _TESTS = [{
        'url': 'https://abema.tv/video/episode/194-25_s2_p1',
        'info_dict': {
            'id': '194-25_s2_p1',
            'title': '第1話 「チーズケーキ」　「モーニング再び」',
            'series': '異世界食堂２',
            'series_number': 2,
            'episode': '第1話 「チーズケーキ」　「モーニング再び」',
            'episode_number': 1,
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'series': 'ゆるキャン△ SEASON２',
            'episode': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'series_number': 2,
            'episode_number': 1,
            'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': '第5話『光射す』',
            'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
            'thumbnail': r're:https://hayabusa\.io/.+',
            'series': '相棒',
            'episode': '第5話『光射す』',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/now-on-air/abema-anime',
        'info_dict': {
            'id': 'abema-anime',
            # this varies
            # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
            'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
            'is_live': True,
        },
        'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
    }]
    _TIMETABLE = None

    def _perform_login(self, username, password):
        self._get_device_token()
        if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
            self.write_debug('Skipping logging in')
            return

        if '@' in username:  # don't strictly check if it's email address or not
            ep, method = 'user/email', 'email'
        else:
            ep, method = 'oneTimePassword', 'userId'

        login_response = self._download_json(
            f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
            data=json.dumps({
                method: username,
                'password': password
            }).encode('utf-8'), headers={
                'Authorization': f'bearer {self._get_device_token()}',
                'Origin': 'https://abema.tv',
                'Referer': 'https://abema.tv/',
                'Content-Type': 'application/json',
            })

        AbemaTVBaseIE._USERTOKEN = login_response['token']
        self._get_media_token(True)
        self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)

    def _real_extract(self, url):
        # starting download using infojson from this extractor is undefined behavior,
        # and never be fixed in the future; you must trigger downloads by directly specifying URL.
        # (unless there's a way to hook before downloading by extractor)
        video_id, video_type = self._match_valid_url(url).group('id', 'type')
        headers = {
            'Authorization': 'Bearer ' + self._get_device_token(),
        }
        video_type = video_type.split('/')[-1]

        webpage = self._download_webpage(url, video_id)
        canonical_url = self._search_regex(
            r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
            default=url)
        info = self._search_json_ld(webpage, video_id, default={})

        title = self._search_regex(
            r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
        if not title:
            jsonld = None
            for jld in re.finditer(
                    r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                    webpage):
                jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
                if jsonld:
                    break
            if jsonld:
                title = jsonld.get('caption')
        if not title and video_type == 'now-on-air':
            if not self._TIMETABLE:
                # cache the timetable because it goes to 5MiB in size (!!)
                self._TIMETABLE = self._download_json(
                    'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
                    headers=headers)
            now = time_seconds(hours=9)
            for slot in self._TIMETABLE.get('slots', []):
                if slot.get('channelId') != video_id:
                    continue
                if slot['startAt'] <= now and now < slot['endAt']:
                    title = slot['title']
                    break

        # read breadcrumb on top of page
        breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
        if breadcrumb:
            # breadcrumb list translates to: (e.g. 1st test for this IE)
            # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
            # hence this works
            info['series'] = breadcrumb[-2]
            info['episode'] = breadcrumb[-1]
            if not title:
                title = info['episode']

        description = self._html_search_regex(
            (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
             r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
            webpage, 'description', default=None, group=1)
        if not description:
            og_desc = self._html_search_meta(
                ('description', 'og:description', 'twitter:description'), webpage)
            if og_desc:
                description = re.sub(r'''(?sx)
                    ^(.+?)(?:
                        アニメの動画を無料で見るならABEMA！| # anime
                        等、.+ # applies for most of categories
                    )?
                ''', r'\1', og_desc)

        # canonical URL may contain series and episode number
        mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
        if mobj:
            seri = int_or_none(mobj.group(1), default=float('inf'))
            epis = int_or_none(mobj.group(2), default=float('inf'))
            info['series_number'] = seri if seri < 100 else None
            # some anime like Detective Conan (though not available in AbemaTV)
            # has more than 1000 episodes (1026 as of 2021/11/15)
            info['episode_number'] = epis if epis < 2000 else None

        is_live, m3u8_url = False, None
        if video_type == 'now-on-air':
            is_live = True
            channel_url = 'https://api.abema.io/v1/channels'
            if video_id == 'news-global':
                channel_url = update_url_query(channel_url, {'division': '1'})
            onair_channels = self._download_json(channel_url, video_id)
            for ch in onair_channels['channels']:
                if video_id == ch['id']:
                    m3u8_url = ch['playback']['hls']
                    break
            else:
                raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
        elif video_type == 'episode':
            api_response = self._download_json(
                f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
            if 3 not in ondemand_types:
                # cannot acquire decryption key for these streams
                self.report_warning('This is a premium-only stream')
            info.update(traverse_obj(api_response, {
                'series': ('series', 'title'),
                'season': ('season', 'title'),
                'season_number': ('season', 'sequence'),
                'episode_number': ('episode', 'number'),
            }))
            if not title:
                title = traverse_obj(api_response, ('episode', 'title'))
            if not description:
                description = traverse_obj(api_response, ('episode', 'content'))

            m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
        elif video_type == 'slots':
            api_response = self._download_json(
                f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
                self.report_warning('This is a premium-only stream')

            m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
        else:
            raise ExtractorError('Unreachable')

        if is_live:
            self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
            self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
        formats = self._extract_m3u8_formats(
            m3u8_url, video_id, ext='mp4', live=is_live)

        info.update({
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
            'is_live': is_live,
        })
        return info


class AbemaTVTitleIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
    _PAGE_SIZE = 25

    _TESTS = [{
        'url': 'https://abema.tv/video/title/90-1597',
        'info_dict': {
            'id': '90-1597',
            'title': 'シャッフルアイランド',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'https://abema.tv/video/title/193-132',
        'info_dict': {
            'id': '193-132',
            'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
        },
        'playlist_mincount': 16,
    }, {
        'url': 'https://abema.tv/video/title/25-102',
        'info_dict': {
            'id': '25-102',
            'title': 'ソードアート・オンライン アリシゼーション',
        },
        'playlist_mincount': 24,
    }]

    def _fetch_page(self, playlist_id, series_version, page):
        programs = self._call_api(
            f'v1/video/series/{playlist_id}/programs', playlist_id,
            note=f'Downloading page {page + 1}',
            query={
                'seriesVersion': series_version,
                'offset': str(page * self._PAGE_SIZE),
                'order': 'seq',
                'limit': str(self._PAGE_SIZE),
            })
        yield from (
            self.url_result(f'https://abema.tv/video/episode/{x}')
            for x in traverse_obj(programs, ('programs', ..., 'id')))

    def _entries(self, playlist_id, series_version):
        return OnDemandPagedList(
            functools.partial(self._fetch_page, playlist_id, series_version),
            self._PAGE_SIZE)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)

        return self.playlist_result(
            self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
            playlist_title=series_info.get('title'),
            playlist_description=series_info.get('content'))
Commit	Line	Data
f8271158	1	import base64
f8271158	2	import binascii
bc83b4b0	3	import functools
3e9b66d7 LNO	4	import hashlib
3e9b66d7 LNO	5	import hmac
f8271158	6	import io
f8271158	7	import json
3e9b66d7 LNO	8	import re
3e9b66d7 LNO	9	import struct
f8271158	10	import time
14f25df2	11	import urllib.parse
ac668111	12	import urllib.request
f9934b96	13	import urllib.response
f9934b96	14	import uuid
3e9b66d7	15
3e9b66d7 LNO	16	from .common import InfoExtractor
3e9b66d7 LNO	17	from ..aes import aes_ecb_decrypt
3e9b66d7 LNO	18	from ..utils import (
3e9b66d7 LNO	19	ExtractorError,
f8271158	20	bytes_to_intlist,
7b2c3f47	21	decode_base_n,
3e9b66d7	22	int_or_none,
f8271158	23	intlist_to_bytes,
bc83b4b0	24	OnDemandPagedList,
3e9b66d7	25	time_seconds,
3e9b66d7	26	traverse_obj,
f8271158	27	update_url_query,
3e9b66d7 LNO	28	)
3e9b66d7 LNO	29
3e9b66d7	30
9f662472	31	def add_opener(ydl, handler): # FIXME: Create proper API in .networking
9f662472	32	"""Add a handler for opening URLs, like _download_webpage"""
3e9b66d7 LNO	33	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
3e9b66d7 LNO	34	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
9f662472	35	rh = ydl._request_director.handlers['Urllib']
	36	if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
	37	return
	38	opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
	39	assert isinstance(opener, urllib.request.OpenerDirector)
	40	opener.add_handler(handler)
	41	rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
3e9b66d7 LNO	42
3e9b66d7 LNO	43
ac668111	44	class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7 LNO	45	handler_order = 499
	46	STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
	47	HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
	48
	49	def __init__(self, ie: 'AbemaTVIE'):
962ffcf8	50	# the protocol that this should really handle is 'abematv-license://'
3e9b66d7 LNO	51	# abematv_license_open is just a placeholder for development purposes
	52	# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
	53	setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
	54	self.ie = ie
	55
	56	def _get_videokey_from_ticket(self, ticket):
9809740b	57	to_show = self.ie.get_param('verbose', False)
3e9b66d7 LNO	58	media_token = self.ie._get_media_token(to_show=to_show)
	59
	60	license_response = self.ie._download_json(
	61	'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
	62	query={'t': media_token},
	63	data=json.dumps({
	64	'kv': 'a',
	65	'lt': ticket
	66	}).encode('utf-8'),
	67	headers={
	68	'Content-Type': 'application/json',
	69	})
	70
7b2c3f47	71	res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7 LNO	72	encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
	73
	74	h = hmac.new(
f8271158	75	binascii.unhexlify(self.HKEY),
3e9b66d7 LNO	76	(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
	77	digestmod=hashlib.sha256)
	78	enckey = bytes_to_intlist(h.digest())
	79
	80	return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
	81
	82	def abematv_license_open(self, url):
3d2623a8	83	url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
14f25df2	84	ticket = urllib.parse.urlparse(url).netloc
3e9b66d7	85	response_data = self._get_videokey_from_ticket(ticket)
f9934b96	86	return urllib.response.addinfourl(io.BytesIO(response_data), headers={
9f662472	87	'Content-Length': str(len(response_data)),
3e9b66d7 LNO	88	}, url=url, code=200)
	89
	90
	91	class AbemaTVBaseIE(InfoExtractor):
3e9b66d7 LNO	92	_USERTOKEN = None
3e9b66d7 LNO	93	_DEVICE_ID = None
3e9b66d7 LNO	94	_MEDIATOKEN = None
	95
	96	_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
	97
bc83b4b0 L	98	@classmethod
bc83b4b0 L	99	def _generate_aks(cls, deviceid):
3e9b66d7 LNO	100	deviceid = deviceid.encode('utf-8')
3e9b66d7 LNO	101	# add 1 hour and then drop minute and secs
a4f16832	102	ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
3e9b66d7 LNO	103	time_struct = time.gmtime(ts_1hour)
	104	ts_1hour_str = str(ts_1hour).encode('utf-8')
	105
	106	tmp = None
	107
	108	def mix_once(nonce):
	109	nonlocal tmp
bc83b4b0	110	h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
3e9b66d7 LNO	111	h.update(nonce)
	112	tmp = h.digest()
	113
	114	def mix_tmp(count):
	115	nonlocal tmp
	116	for i in range(count):
	117	mix_once(tmp)
	118
	119	def mix_twist(nonce):
	120	nonlocal tmp
f8271158	121	mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7	122
bc83b4b0	123	mix_once(cls._SECRETKEY)
3e9b66d7 LNO	124	mix_tmp(time_struct.tm_mon)
	125	mix_twist(deviceid)
	126	mix_tmp(time_struct.tm_mday % 5)
	127	mix_twist(ts_1hour_str)
	128	mix_tmp(time_struct.tm_hour % 5)
	129
f8271158	130	return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7 LNO	131
	132	def _get_device_token(self):
	133	if self._USERTOKEN:
	134	return self._USERTOKEN
	135
a4f16832 L	136	username, _ = self._get_login_info()
	137	AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
	138	if AbemaTVBaseIE._USERTOKEN:
	139	# try authentication with locally stored token
	140	try:
	141	self._get_media_token(True)
	142	return
	143	except ExtractorError as e:
	144	self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
	145
bc83b4b0	146	AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
3e9b66d7 LNO	147	aks = self._generate_aks(self._DEVICE_ID)
	148	user_data = self._download_json(
	149	'https://api.abema.io/v1/users', None, note='Authorizing',
	150	data=json.dumps({
	151	'deviceId': self._DEVICE_ID,
	152	'applicationKeySecret': aks,
	153	}).encode('utf-8'),
	154	headers={
	155	'Content-Type': 'application/json',
	156	})
bc83b4b0	157	AbemaTVBaseIE._USERTOKEN = user_data['token']
3e9b66d7	158
3e9b66d7	159	add_opener(self._downloader, AbemaLicenseHandler(self))
3e9b66d7 LNO	160	return self._USERTOKEN
	161
	162	def _get_media_token(self, invalidate=False, to_show=True):
	163	if not invalidate and self._MEDIATOKEN:
	164	return self._MEDIATOKEN
	165
bc83b4b0	166	AbemaTVBaseIE._MEDIATOKEN = self._download_json(
3e9b66d7 LNO	167	'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
	168	query={
	169	'osName': 'android',
	170	'osVersion': '6.0.1',
	171	'osLang': 'ja_JP',
	172	'osTimezone': 'Asia/Tokyo',
	173	'appId': 'tv.abema',
	174	'appVersion': '3.27.1'
	175	}, headers={
bc83b4b0	176	'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7 LNO	177	})['token']
	178
	179	return self._MEDIATOKEN
	180
bc83b4b0 L	181	def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
	182	return self._download_json(
	183	f'https://api.abema.io/{endpoint}', video_id, query=query or {},
	184	note=note,
	185	headers={
	186	'Authorization': f'bearer {self._get_device_token()}',
	187	})
	188
	189	def _extract_breadcrumb_list(self, webpage, video_id):
	190	for jld in re.finditer(
	191	r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
	192	webpage):
	193	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	194	if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
	195	continue
	196	items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
	197	if items:
	198	return items
	199	return []
	200
	201
	202	class AbemaTVIE(AbemaTVBaseIE):
	203	_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air\|video/episode\|channels/.+?/slots)/(?P<id>[^?/]+)'
	204	_NETRC_MACHINE = 'abematv'
	205	_TESTS = [{
	206	'url': 'https://abema.tv/video/episode/194-25_s2_p1',
	207	'info_dict': {
	208	'id': '194-25_s2_p1',
	209	'title': '第1話「チーズケーキ」　「モーニング再び」',
	210	'series': '異世界食堂２',
	211	'series_number': 2,
	212	'episode': '第1話「チーズケーキ」　「モーニング再び」',
	213	'episode_number': 1,
	214	},
	215	'skip': 'expired',
	216	}, {
	217	'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
	218	'info_dict': {
	219	'id': 'E8tvAnMJ7a9a5d',
	220	'title': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	221	'series': 'ゆるキャン△ SEASON２',
	222	'episode': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	223	'series_number': 2,
	224	'episode_number': 1,
	225	'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
	226	},
	227	'skip': 'expired',
	228	}, {
	229	'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
	230	'info_dict': {
	231	'id': 'E8tvAnMJ7a9a5d',
	232	'title': '第5話『光射す』',
	233	'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
	234	'thumbnail': r're:https://hayabusa\.io/.+',
	235	'series': '相棒',
	236	'episode': '第5話『光射す』',
	237	},
	238	'skip': 'expired',
	239	}, {
	240	'url': 'https://abema.tv/now-on-air/abema-anime',
	241	'info_dict': {
	242	'id': 'abema-anime',
	243	# this varies
	244	# 'title': '女子高生の無駄づかい全話一挙【無料ビデオ72時間】',
245	'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
246	'is_live': True,
247	},
248	'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
249	}]
250	_TIMETABLE = None
251
52efa4b3	252	def _perform_login(self, username, password):
a4f16832 L	253	self._get_device_token()
	254	if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
	255	self.write_debug('Skipping logging in')
	256	return
	257
3e9b66d7 LNO	258	if '@' in username: # don't strictly check if it's email address or not
	259	ep, method = 'user/email', 'email'
	260	else:
	261	ep, method = 'oneTimePassword', 'userId'
	262
	263	login_response = self._download_json(
	264	f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
	265	data=json.dumps({
	266	method: username,
	267	'password': password
	268	}).encode('utf-8'), headers={
bc83b4b0	269	'Authorization': f'bearer {self._get_device_token()}',
3e9b66d7 LNO	270	'Origin': 'https://abema.tv',
	271	'Referer': 'https://abema.tv/',
	272	'Content-Type': 'application/json',
	273	})
	274
bc83b4b0	275	AbemaTVBaseIE._USERTOKEN = login_response['token']
3e9b66d7	276	self._get_media_token(True)
a4f16832	277	self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
3e9b66d7 LNO	278
	279	def _real_extract(self, url):
	280	# starting download using infojson from this extractor is undefined behavior,
962ffcf8	281	# and never be fixed in the future; you must trigger downloads by directly specifying URL.
3e9b66d7 LNO	282	# (unless there's a way to hook before downloading by extractor)
	283	video_id, video_type = self._match_valid_url(url).group('id', 'type')
	284	headers = {
	285	'Authorization': 'Bearer ' + self._get_device_token(),
	286	}
	287	video_type = video_type.split('/')[-1]
	288
	289	webpage = self._download_webpage(url, video_id)
	290	canonical_url = self._search_regex(
	291	r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
	292	default=url)
	293	info = self._search_json_ld(webpage, video_id, default={})
	294
	295	title = self._search_regex(
	296	r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
	297	if not title:
	298	jsonld = None
	299	for jld in re.finditer(
	300	r'(?is)<span\sclass="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]>(?P<json_ld>.+?)</script>',
	301	webpage):
	302	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	303	if jsonld:
	304	break
	305	if jsonld:
	306	title = jsonld.get('caption')
	307	if not title and video_type == 'now-on-air':
	308	if not self._TIMETABLE:
	309	# cache the timetable because it goes to 5MiB in size (!!)
	310	self._TIMETABLE = self._download_json(
	311	'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
	312	headers=headers)
	313	now = time_seconds(hours=9)
	314	for slot in self._TIMETABLE.get('slots', []):
	315	if slot.get('channelId') != video_id:
	316	continue
	317	if slot['startAt'] <= now and now < slot['endAt']:
	318	title = slot['title']
	319	break
	320
	321	# read breadcrumb on top of page
	322	breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
	323	if breadcrumb:
62b58c09	324	# breadcrumb list translates to: (e.g. 1st test for this IE)
3e9b66d7 LNO	325	# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
	326	# hence this works
	327	info['series'] = breadcrumb[-2]
	328	info['episode'] = breadcrumb[-1]
	329	if not title:
	330	title = info['episode']
	331
	332	description = self._html_search_regex(
	333	(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
	334	r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
	335	webpage, 'description', default=None, group=1)
	336	if not description:
	337	og_desc = self._html_search_meta(
	338	('description', 'og:description', 'twitter:description'), webpage)
	339	if og_desc:
	340	description = re.sub(r'''(?sx)
	341	^(.+?)(?:
	342	アニメの動画を無料で見るならABEMA！\| # anime
	343	等、.+ # applies for most of categories
	344	)?
	345	''', r'\1', og_desc)
	346
	347	# canonical URL may contain series and episode number
	348	mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
	349	if mobj:
	350	seri = int_or_none(mobj.group(1), default=float('inf'))
	351	epis = int_or_none(mobj.group(2), default=float('inf'))
	352	info['series_number'] = seri if seri < 100 else None
	353	# some anime like Detective Conan (though not available in AbemaTV)
	354	# has more than 1000 episodes (1026 as of 2021/11/15)
	355	info['episode_number'] = epis if epis < 2000 else None
	356
	357	is_live, m3u8_url = False, None
	358	if video_type == 'now-on-air':
	359	is_live = True
	360	channel_url = 'https://api.abema.io/v1/channels'
	361	if video_id == 'news-global':
	362	channel_url = update_url_query(channel_url, {'division': '1'})
	363	onair_channels = self._download_json(channel_url, video_id)
	364	for ch in onair_channels['channels']:
	365	if video_id == ch['id']:
	366	m3u8_url = ch['playback']['hls']
	367	break
	368	else:
	369	raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
	370	elif video_type == 'episode':
	371	api_response = self._download_json(
	372	f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
	373	note='Checking playability',
	374	headers=headers)
6839ae1f	375	ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
3e9b66d7 LNO	376	if 3 not in ondemand_types:
	377	# cannot acquire decryption key for these streams
	378	self.report_warning('This is a premium-only stream')
c449c065 L	379	info.update(traverse_obj(api_response, {
	380	'series': ('series', 'title'),
	381	'season': ('season', 'title'),
	382	'season_number': ('season', 'sequence'),
	383	'episode_number': ('episode', 'number'),
	384	}))
	385	if not title:
	386	title = traverse_obj(api_response, ('episode', 'title'))
	387	if not description:
	388	description = traverse_obj(api_response, ('episode', 'content'))
3e9b66d7 LNO	389
	390	m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
	391	elif video_type == 'slots':
	392	api_response = self._download_json(
	393	f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
	394	note='Checking playability',
	395	headers=headers)
	396	if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
	397	self.report_warning('This is a premium-only stream')
	398
	399	m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
	400	else:
	401	raise ExtractorError('Unreachable')
	402
	403	if is_live:
	404	self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
	405	self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
	406	formats = self._extract_m3u8_formats(
	407	m3u8_url, video_id, ext='mp4', live=is_live)
	408
	409	info.update({
	410	'id': video_id,
	411	'title': title,
	412	'description': description,
	413	'formats': formats,
	414	'is_live': is_live,
	415	})
	416	return info
	417
	418
	419	class AbemaTVTitleIE(AbemaTVBaseIE):
	420	_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
bc83b4b0	421	_PAGE_SIZE = 25
3e9b66d7 LNO	422
	423	_TESTS = [{
	424	'url': 'https://abema.tv/video/title/90-1597',
	425	'info_dict': {
	426	'id': '90-1597',
	427	'title': 'シャッフルアイランド',
	428	},
	429	'playlist_mincount': 2,
	430	}, {
	431	'url': 'https://abema.tv/video/title/193-132',
	432	'info_dict': {
	433	'id': '193-132',
	434	'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
	435	},
	436	'playlist_mincount': 16,
bc83b4b0 L	437	}, {
	438	'url': 'https://abema.tv/video/title/25-102',
	439	'info_dict': {
	440	'id': '25-102',
	441	'title': 'ソードアート・オンラインアリシゼーション',
	442	},
	443	'playlist_mincount': 24,
3e9b66d7 LNO	444	}]
3e9b66d7 LNO	445
bc83b4b0 L	446	def _fetch_page(self, playlist_id, series_version, page):
	447	programs = self._call_api(
	448	f'v1/video/series/{playlist_id}/programs', playlist_id,
	449	note=f'Downloading page {page + 1}',
	450	query={
	451	'seriesVersion': series_version,
	452	'offset': str(page * self._PAGE_SIZE),
	453	'order': 'seq',
	454	'limit': str(self._PAGE_SIZE),
	455	})
	456	yield from (
	457	self.url_result(f'https://abema.tv/video/episode/{x}')
6839ae1f	458	for x in traverse_obj(programs, ('programs', ..., 'id')))
3e9b66d7	459
bc83b4b0 L	460	def _entries(self, playlist_id, series_version):
	461	return OnDemandPagedList(
	462	functools.partial(self._fetch_page, playlist_id, series_version),
	463	self._PAGE_SIZE)
3e9b66d7	464
bc83b4b0 L	465	def _real_extract(self, url):
	466	playlist_id = self._match_id(url)
	467	series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
3e9b66d7	468
bc83b4b0 L	469	return self.playlist_result(
	470	self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
	471	playlist_title=series_info.get('title'),
	472	playlist_description=series_info.get('content'))