[yt-dlp.git] / yt_dlp / extractor / abematv.py

import base64
import binascii
import hashlib
import hmac
import io
import json
import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid

from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
    ExtractorError,
    bytes_to_intlist,
    decode_base_n,
    int_or_none,
    intlist_to_bytes,
    request_to_url,
    time_seconds,
    traverse_obj,
    update_url_query,
    urljoin,
)

# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)


def add_opener(ydl, handler):
    ''' Add a handler for opening URLs, like _download_webpage '''
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
    assert isinstance(ydl._opener, urllib.request.OpenerDirector)
    ydl._opener.add_handler(handler)


def remove_opener(ydl, handler):
    '''
    Remove handler(s) for opening URLs
    @param handler Either handler object itself or handler type.
    Specifying handler type will remove all handler which isinstance returns True.
    '''
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
    opener = ydl._opener
    assert isinstance(ydl._opener, urllib.request.OpenerDirector)
    if isinstance(handler, (type, tuple)):
        find_cp = lambda x: isinstance(x, handler)
    else:
        find_cp = lambda x: x is handler

    removed = []
    for meth in dir(handler):
        if meth in ["redirect_request", "do_open", "proxy_open"]:
            # oops, coincidental match
            continue

        i = meth.find("_")
        protocol = meth[:i]
        condition = meth[i + 1:]

        if condition.startswith("error"):
            j = condition.find("_") + i + 1
            kind = meth[j + 1:]
            try:
                kind = int(kind)
            except ValueError:
                pass
            lookup = opener.handle_error.get(protocol, {})
            opener.handle_error[protocol] = lookup
        elif condition == "open":
            kind = protocol
            lookup = opener.handle_open
        elif condition == "response":
            kind = protocol
            lookup = opener.process_response
        elif condition == "request":
            kind = protocol
            lookup = opener.process_request
        else:
            continue

        handlers = lookup.setdefault(kind, [])
        if handlers:
            handlers[:] = [x for x in handlers if not find_cp(x)]

        removed.append(x for x in handlers if find_cp(x))

    if removed:
        for x in opener.handlers:
            if find_cp(x):
                x.add_parent(None)
        opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]


class AbemaLicenseHandler(urllib.request.BaseHandler):
    handler_order = 499
    STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
    HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'

    def __init__(self, ie: 'AbemaTVIE'):
        # the protcol that this should really handle is 'abematv-license://'
        # abematv_license_open is just a placeholder for development purposes
        # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
        setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
        self.ie = ie

    def _get_videokey_from_ticket(self, ticket):
        to_show = self.ie.get_param('verbose', False)
        media_token = self.ie._get_media_token(to_show=to_show)

        license_response = self.ie._download_json(
            'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
            query={'t': media_token},
            data=json.dumps({
                'kv': 'a',
                'lt': ticket
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })

        res = decode_base_n(license_response['k'], table=self.STRTABLE)
        encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))

        h = hmac.new(
            binascii.unhexlify(self.HKEY),
            (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
            digestmod=hashlib.sha256)
        enckey = bytes_to_intlist(h.digest())

        return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))

    def abematv_license_open(self, url):
        url = request_to_url(url)
        ticket = urllib.parse.urlparse(url).netloc
        response_data = self._get_videokey_from_ticket(ticket)
        return urllib.response.addinfourl(io.BytesIO(response_data), headers={
            'Content-Length': len(response_data),
        }, url=url, code=200)


class AbemaTVBaseIE(InfoExtractor):
    def _extract_breadcrumb_list(self, webpage, video_id):
        for jld in re.finditer(
                r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                webpage):
            jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
            if jsonld:
                if jsonld.get('@type') != 'BreadcrumbList':
                    continue
                trav = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
                if trav:
                    return trav
        return []


class AbemaTVIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
    _NETRC_MACHINE = 'abematv'
    _TESTS = [{
        'url': 'https://abema.tv/video/episode/194-25_s2_p1',
        'info_dict': {
            'id': '194-25_s2_p1',
            'title': '第1話 「チーズケーキ」　「モーニング再び」',
            'series': '異世界食堂２',
            'series_number': 2,
            'episode': '第1話 「チーズケーキ」　「モーニング再び」',
            'episode_number': 1,
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'series': 'ゆるキャン△ SEASON２',
            'episode': 'ゆるキャン△ SEASON２ 全話一挙【無料ビデオ72時間】',
            'series_number': 2,
            'episode_number': 1,
            'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
        'info_dict': {
            'id': 'E8tvAnMJ7a9a5d',
            'title': '第5話『光射す』',
            'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
            'thumbnail': r're:https://hayabusa\.io/.+',
            'series': '相棒',
            'episode': '第5話『光射す』',
        },
        'skip': 'expired',
    }, {
        'url': 'https://abema.tv/now-on-air/abema-anime',
        'info_dict': {
            'id': 'abema-anime',
            # this varies
            # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
            'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
            'is_live': True,
        },
        'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
    }]
    _USERTOKEN = None
    _DEVICE_ID = None
    _TIMETABLE = None
    _MEDIATOKEN = None

    _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'

    def _generate_aks(self, deviceid):
        deviceid = deviceid.encode('utf-8')
        # add 1 hour and then drop minute and secs
        ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
        time_struct = time.gmtime(ts_1hour)
        ts_1hour_str = str(ts_1hour).encode('utf-8')

        tmp = None

        def mix_once(nonce):
            nonlocal tmp
            h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256)
            h.update(nonce)
            tmp = h.digest()

        def mix_tmp(count):
            nonlocal tmp
            for i in range(count):
                mix_once(tmp)

        def mix_twist(nonce):
            nonlocal tmp
            mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)

        mix_once(self._SECRETKEY)
        mix_tmp(time_struct.tm_mon)
        mix_twist(deviceid)
        mix_tmp(time_struct.tm_mday % 5)
        mix_twist(ts_1hour_str)
        mix_tmp(time_struct.tm_hour % 5)

        return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')

    def _get_device_token(self):
        if self._USERTOKEN:
            return self._USERTOKEN

        self._DEVICE_ID = str(uuid.uuid4())
        aks = self._generate_aks(self._DEVICE_ID)
        user_data = self._download_json(
            'https://api.abema.io/v1/users', None, note='Authorizing',
            data=json.dumps({
                'deviceId': self._DEVICE_ID,
                'applicationKeySecret': aks,
            }).encode('utf-8'),
            headers={
                'Content-Type': 'application/json',
            })
        self._USERTOKEN = user_data['token']

        # don't allow adding it 2 times or more, though it's guarded
        remove_opener(self._downloader, AbemaLicenseHandler)
        add_opener(self._downloader, AbemaLicenseHandler(self))

        return self._USERTOKEN

    def _get_media_token(self, invalidate=False, to_show=True):
        if not invalidate and self._MEDIATOKEN:
            return self._MEDIATOKEN

        self._MEDIATOKEN = self._download_json(
            'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
            query={
                'osName': 'android',
                'osVersion': '6.0.1',
                'osLang': 'ja_JP',
                'osTimezone': 'Asia/Tokyo',
                'appId': 'tv.abema',
                'appVersion': '3.27.1'
            }, headers={
                'Authorization': 'bearer ' + self._get_device_token()
            })['token']

        return self._MEDIATOKEN

    def _perform_login(self, username, password):
        if '@' in username:  # don't strictly check if it's email address or not
            ep, method = 'user/email', 'email'
        else:
            ep, method = 'oneTimePassword', 'userId'

        login_response = self._download_json(
            f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
            data=json.dumps({
                method: username,
                'password': password
            }).encode('utf-8'), headers={
                'Authorization': 'bearer ' + self._get_device_token(),
                'Origin': 'https://abema.tv',
                'Referer': 'https://abema.tv/',
                'Content-Type': 'application/json',
            })

        self._USERTOKEN = login_response['token']
        self._get_media_token(True)

    def _real_extract(self, url):
        # starting download using infojson from this extractor is undefined behavior,
        # and never be fixed in the future; you must trigger downloads by directly specifing URL.
        # (unless there's a way to hook before downloading by extractor)
        video_id, video_type = self._match_valid_url(url).group('id', 'type')
        headers = {
            'Authorization': 'Bearer ' + self._get_device_token(),
        }
        video_type = video_type.split('/')[-1]

        webpage = self._download_webpage(url, video_id)
        canonical_url = self._search_regex(
            r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
            default=url)
        info = self._search_json_ld(webpage, video_id, default={})

        title = self._search_regex(
            r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
        if not title:
            jsonld = None
            for jld in re.finditer(
                    r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
                    webpage):
                jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
                if jsonld:
                    break
            if jsonld:
                title = jsonld.get('caption')
        if not title and video_type == 'now-on-air':
            if not self._TIMETABLE:
                # cache the timetable because it goes to 5MiB in size (!!)
                self._TIMETABLE = self._download_json(
                    'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
                    headers=headers)
            now = time_seconds(hours=9)
            for slot in self._TIMETABLE.get('slots', []):
                if slot.get('channelId') != video_id:
                    continue
                if slot['startAt'] <= now and now < slot['endAt']:
                    title = slot['title']
                    break

        # read breadcrumb on top of page
        breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
        if breadcrumb:
            # breadcrumb list translates to: (example is 1st test for this IE)
            # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
            # hence this works
            info['series'] = breadcrumb[-2]
            info['episode'] = breadcrumb[-1]
            if not title:
                title = info['episode']

        description = self._html_search_regex(
            (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
             r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
            webpage, 'description', default=None, group=1)
        if not description:
            og_desc = self._html_search_meta(
                ('description', 'og:description', 'twitter:description'), webpage)
            if og_desc:
                description = re.sub(r'''(?sx)
                    ^(.+?)(?:
                        アニメの動画を無料で見るならABEMA！| # anime
                        等、.+ # applies for most of categories
                    )?
                ''', r'\1', og_desc)

        # canonical URL may contain series and episode number
        mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
        if mobj:
            seri = int_or_none(mobj.group(1), default=float('inf'))
            epis = int_or_none(mobj.group(2), default=float('inf'))
            info['series_number'] = seri if seri < 100 else None
            # some anime like Detective Conan (though not available in AbemaTV)
            # has more than 1000 episodes (1026 as of 2021/11/15)
            info['episode_number'] = epis if epis < 2000 else None

        is_live, m3u8_url = False, None
        if video_type == 'now-on-air':
            is_live = True
            channel_url = 'https://api.abema.io/v1/channels'
            if video_id == 'news-global':
                channel_url = update_url_query(channel_url, {'division': '1'})
            onair_channels = self._download_json(channel_url, video_id)
            for ch in onair_channels['channels']:
                if video_id == ch['id']:
                    m3u8_url = ch['playback']['hls']
                    break
            else:
                raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
        elif video_type == 'episode':
            api_response = self._download_json(
                f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[])
            if 3 not in ondemand_types:
                # cannot acquire decryption key for these streams
                self.report_warning('This is a premium-only stream')

            m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
        elif video_type == 'slots':
            api_response = self._download_json(
                f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
                note='Checking playability',
                headers=headers)
            if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
                self.report_warning('This is a premium-only stream')

            m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
        else:
            raise ExtractorError('Unreachable')

        if is_live:
            self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
            self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
        formats = self._extract_m3u8_formats(
            m3u8_url, video_id, ext='mp4', live=is_live)

        info.update({
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
            'is_live': is_live,
        })
        return info


class AbemaTVTitleIE(AbemaTVBaseIE):
    _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'

    _TESTS = [{
        'url': 'https://abema.tv/video/title/90-1597',
        'info_dict': {
            'id': '90-1597',
            'title': 'シャッフルアイランド',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'https://abema.tv/video/title/193-132',
        'info_dict': {
            'id': '193-132',
            'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
        },
        'playlist_mincount': 16,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id)
        if breadcrumb:
            playlist_title = breadcrumb[-1]

        playlist = [
            self.url_result(urljoin('https://abema.tv/', mobj.group(1)))
            for mobj in re.finditer(r'<li\s*class=".+?EpisodeList.+?"><a\s*href="(/[^"]+?)"', webpage)]

        return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id)
Commit	Line	Data
f8271158	1	import base64
f8271158	2	import binascii
3e9b66d7 LNO	3	import hashlib
3e9b66d7 LNO	4	import hmac
f8271158	5	import io
f8271158	6	import json
3e9b66d7 LNO	7	import re
3e9b66d7 LNO	8	import struct
f8271158	9	import time
14f25df2	10	import urllib.parse
ac668111	11	import urllib.request
f9934b96	12	import urllib.response
f9934b96	13	import uuid
3e9b66d7	14
3e9b66d7 LNO	15	from .common import InfoExtractor
3e9b66d7 LNO	16	from ..aes import aes_ecb_decrypt
3e9b66d7 LNO	17	from ..utils import (
3e9b66d7 LNO	18	ExtractorError,
f8271158	19	bytes_to_intlist,
7b2c3f47	20	decode_base_n,
3e9b66d7	21	int_or_none,
f8271158	22	intlist_to_bytes,
3e9b66d7 LNO	23	request_to_url,
3e9b66d7 LNO	24	time_seconds,
3e9b66d7	25	traverse_obj,
f8271158	26	update_url_query,
3e9b66d7 LNO	27	urljoin,
	28	)
	29
3e9b66d7 LNO	30	# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
3e9b66d7 LNO	31
e5a998f3	32
08d30158	33	def add_opener(ydl, handler):
3e9b66d7 LNO	34	''' Add a handler for opening URLs, like _download_webpage '''
	35	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
	36	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
ac668111	37	assert isinstance(ydl._opener, urllib.request.OpenerDirector)
08d30158	38	ydl._opener.add_handler(handler)
3e9b66d7 LNO	39
3e9b66d7 LNO	40
08d30158	41	def remove_opener(ydl, handler):
3e9b66d7 LNO	42	'''
	43	Remove handler(s) for opening URLs
	44	@param handler Either handler object itself or handler type.
	45	Specifying handler type will remove all handler which isinstance returns True.
	46	'''
	47	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
	48	# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
08d30158	49	opener = ydl._opener
ac668111	50	assert isinstance(ydl._opener, urllib.request.OpenerDirector)
3e9b66d7 LNO	51	if isinstance(handler, (type, tuple)):
	52	find_cp = lambda x: isinstance(x, handler)
	53	else:
	54	find_cp = lambda x: x is handler
	55
	56	removed = []
	57	for meth in dir(handler):
	58	if meth in ["redirect_request", "do_open", "proxy_open"]:
	59	# oops, coincidental match
	60	continue
	61
	62	i = meth.find("_")
	63	protocol = meth[:i]
	64	condition = meth[i + 1:]
	65
	66	if condition.startswith("error"):
	67	j = condition.find("_") + i + 1
	68	kind = meth[j + 1:]
	69	try:
	70	kind = int(kind)
	71	except ValueError:
	72	pass
	73	lookup = opener.handle_error.get(protocol, {})
	74	opener.handle_error[protocol] = lookup
	75	elif condition == "open":
	76	kind = protocol
	77	lookup = opener.handle_open
	78	elif condition == "response":
	79	kind = protocol
	80	lookup = opener.process_response
	81	elif condition == "request":
	82	kind = protocol
	83	lookup = opener.process_request
	84	else:
	85	continue
	86
	87	handlers = lookup.setdefault(kind, [])
	88	if handlers:
	89	handlers[:] = [x for x in handlers if not find_cp(x)]
	90
	91	removed.append(x for x in handlers if find_cp(x))
	92
	93	if removed:
	94	for x in opener.handlers:
	95	if find_cp(x):
	96	x.add_parent(None)
	97	opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
	98
	99
ac668111	100	class AbemaLicenseHandler(urllib.request.BaseHandler):
3e9b66d7 LNO	101	handler_order = 499
	102	STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
	103	HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
	104
	105	def __init__(self, ie: 'AbemaTVIE'):
	106	# the protcol that this should really handle is 'abematv-license://'
	107	# abematv_license_open is just a placeholder for development purposes
	108	# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
	109	setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
	110	self.ie = ie
	111
	112	def _get_videokey_from_ticket(self, ticket):
9809740b	113	to_show = self.ie.get_param('verbose', False)
3e9b66d7 LNO	114	media_token = self.ie._get_media_token(to_show=to_show)
	115
	116	license_response = self.ie._download_json(
	117	'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
	118	query={'t': media_token},
	119	data=json.dumps({
	120	'kv': 'a',
	121	'lt': ticket
	122	}).encode('utf-8'),
	123	headers={
	124	'Content-Type': 'application/json',
	125	})
	126
7b2c3f47	127	res = decode_base_n(license_response['k'], table=self.STRTABLE)
3e9b66d7 LNO	128	encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
	129
	130	h = hmac.new(
f8271158	131	binascii.unhexlify(self.HKEY),
3e9b66d7 LNO	132	(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
	133	digestmod=hashlib.sha256)
	134	enckey = bytes_to_intlist(h.digest())
	135
	136	return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
	137
	138	def abematv_license_open(self, url):
	139	url = request_to_url(url)
14f25df2	140	ticket = urllib.parse.urlparse(url).netloc
3e9b66d7	141	response_data = self._get_videokey_from_ticket(ticket)
f9934b96	142	return urllib.response.addinfourl(io.BytesIO(response_data), headers={
3e9b66d7 LNO	143	'Content-Length': len(response_data),
	144	}, url=url, code=200)
	145
	146
	147	class AbemaTVBaseIE(InfoExtractor):
	148	def _extract_breadcrumb_list(self, webpage, video_id):
	149	for jld in re.finditer(
	150	r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
	151	webpage):
	152	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	153	if jsonld:
	154	if jsonld.get('@type') != 'BreadcrumbList':
	155	continue
	156	trav = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
	157	if trav:
	158	return trav
	159	return []
	160
	161
	162	class AbemaTVIE(AbemaTVBaseIE):
	163	_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air\|video/episode\|channels/.+?/slots)/(?P<id>[^?/]+)'
	164	_NETRC_MACHINE = 'abematv'
	165	_TESTS = [{
	166	'url': 'https://abema.tv/video/episode/194-25_s2_p1',
	167	'info_dict': {
	168	'id': '194-25_s2_p1',
	169	'title': '第1話「チーズケーキ」　「モーニング再び」',
	170	'series': '異世界食堂２',
	171	'series_number': 2,
	172	'episode': '第1話「チーズケーキ」　「モーニング再び」',
	173	'episode_number': 1,
	174	},
	175	'skip': 'expired',
	176	}, {
	177	'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
	178	'info_dict': {
	179	'id': 'E8tvAnMJ7a9a5d',
	180	'title': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	181	'series': 'ゆるキャン△ SEASON２',
	182	'episode': 'ゆるキャン△ SEASON２全話一挙【無料ビデオ72時間】',
	183	'series_number': 2,
	184	'episode_number': 1,
	185	'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
	186	},
	187	'skip': 'expired',
	188	}, {
	189	'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
	190	'info_dict': {
	191	'id': 'E8tvAnMJ7a9a5d',
	192	'title': '第5話『光射す』',
	193	'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
	194	'thumbnail': r're:https://hayabusa\.io/.+',
	195	'series': '相棒',
	196	'episode': '第5話『光射す』',
	197	},
	198	'skip': 'expired',
	199	}, {
	200	'url': 'https://abema.tv/now-on-air/abema-anime',
	201	'info_dict': {
	202	'id': 'abema-anime',
	203	# this varies
	204	# 'title': '女子高生の無駄づかい全話一挙【無料ビデオ72時間】',
	205	'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
	206	'is_live': True,
207	},
208	'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
209	}]
210	_USERTOKEN = None
211	_DEVICE_ID = None
212	_TIMETABLE = None
213	_MEDIATOKEN = None
214
215	_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
216
217	def _generate_aks(self, deviceid):
218	deviceid = deviceid.encode('utf-8')
219	# add 1 hour and then drop minute and secs
220	ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
221	time_struct = time.gmtime(ts_1hour)
222	ts_1hour_str = str(ts_1hour).encode('utf-8')
223
224	tmp = None
225
226	def mix_once(nonce):
227	nonlocal tmp
228	h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256)
229	h.update(nonce)
230	tmp = h.digest()
231
232	def mix_tmp(count):
233	nonlocal tmp
234	for i in range(count):
235	mix_once(tmp)
236
237	def mix_twist(nonce):
238	nonlocal tmp
f8271158	239	mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
3e9b66d7 LNO	240
	241	mix_once(self._SECRETKEY)
	242	mix_tmp(time_struct.tm_mon)
	243	mix_twist(deviceid)
	244	mix_tmp(time_struct.tm_mday % 5)
	245	mix_twist(ts_1hour_str)
	246	mix_tmp(time_struct.tm_hour % 5)
	247
f8271158	248	return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
3e9b66d7 LNO	249
	250	def _get_device_token(self):
	251	if self._USERTOKEN:
	252	return self._USERTOKEN
	253
f9934b96	254	self._DEVICE_ID = str(uuid.uuid4())
3e9b66d7 LNO	255	aks = self._generate_aks(self._DEVICE_ID)
	256	user_data = self._download_json(
	257	'https://api.abema.io/v1/users', None, note='Authorizing',
	258	data=json.dumps({
	259	'deviceId': self._DEVICE_ID,
	260	'applicationKeySecret': aks,
	261	}).encode('utf-8'),
	262	headers={
	263	'Content-Type': 'application/json',
	264	})
	265	self._USERTOKEN = user_data['token']
	266
	267	# don't allow adding it 2 times or more, though it's guarded
	268	remove_opener(self._downloader, AbemaLicenseHandler)
	269	add_opener(self._downloader, AbemaLicenseHandler(self))
	270
	271	return self._USERTOKEN
	272
	273	def _get_media_token(self, invalidate=False, to_show=True):
	274	if not invalidate and self._MEDIATOKEN:
	275	return self._MEDIATOKEN
	276
	277	self._MEDIATOKEN = self._download_json(
	278	'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
	279	query={
	280	'osName': 'android',
	281	'osVersion': '6.0.1',
	282	'osLang': 'ja_JP',
	283	'osTimezone': 'Asia/Tokyo',
	284	'appId': 'tv.abema',
	285	'appVersion': '3.27.1'
	286	}, headers={
	287	'Authorization': 'bearer ' + self._get_device_token()
	288	})['token']
	289
	290	return self._MEDIATOKEN
	291
52efa4b3	292	def _perform_login(self, username, password):
3e9b66d7 LNO	293	if '@' in username: # don't strictly check if it's email address or not
	294	ep, method = 'user/email', 'email'
	295	else:
	296	ep, method = 'oneTimePassword', 'userId'
	297
	298	login_response = self._download_json(
	299	f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
	300	data=json.dumps({
	301	method: username,
	302	'password': password
	303	}).encode('utf-8'), headers={
	304	'Authorization': 'bearer ' + self._get_device_token(),
	305	'Origin': 'https://abema.tv',
	306	'Referer': 'https://abema.tv/',
	307	'Content-Type': 'application/json',
	308	})
	309
	310	self._USERTOKEN = login_response['token']
	311	self._get_media_token(True)
	312
	313	def _real_extract(self, url):
	314	# starting download using infojson from this extractor is undefined behavior,
	315	# and never be fixed in the future; you must trigger downloads by directly specifing URL.
	316	# (unless there's a way to hook before downloading by extractor)
	317	video_id, video_type = self._match_valid_url(url).group('id', 'type')
	318	headers = {
	319	'Authorization': 'Bearer ' + self._get_device_token(),
	320	}
	321	video_type = video_type.split('/')[-1]
	322
	323	webpage = self._download_webpage(url, video_id)
	324	canonical_url = self._search_regex(
	325	r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
	326	default=url)
	327	info = self._search_json_ld(webpage, video_id, default={})
	328
	329	title = self._search_regex(
	330	r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
	331	if not title:
	332	jsonld = None
	333	for jld in re.finditer(
	334	r'(?is)<span\sclass="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]>(?P<json_ld>.+?)</script>',
	335	webpage):
	336	jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
	337	if jsonld:
	338	break
	339	if jsonld:
	340	title = jsonld.get('caption')
	341	if not title and video_type == 'now-on-air':
	342	if not self._TIMETABLE:
	343	# cache the timetable because it goes to 5MiB in size (!!)
	344	self._TIMETABLE = self._download_json(
	345	'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
	346	headers=headers)
	347	now = time_seconds(hours=9)
	348	for slot in self._TIMETABLE.get('slots', []):
	349	if slot.get('channelId') != video_id:
	350	continue
	351	if slot['startAt'] <= now and now < slot['endAt']:
	352	title = slot['title']
	353	break
	354
	355	# read breadcrumb on top of page
	356	breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
357	if breadcrumb:
358	# breadcrumb list translates to: (example is 1st test for this IE)
359	# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
360	# hence this works
361	info['series'] = breadcrumb[-2]
362	info['episode'] = breadcrumb[-1]
363	if not title:
364	title = info['episode']
365
366	description = self._html_search_regex(
367	(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
368	r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
369	webpage, 'description', default=None, group=1)
370	if not description:
371	og_desc = self._html_search_meta(
372	('description', 'og:description', 'twitter:description'), webpage)
373	if og_desc:
374	description = re.sub(r'''(?sx)
375	^(.+?)(?:
376	アニメの動画を無料で見るならABEMA！\| # anime
377	等、.+ # applies for most of categories
378	)?
379	''', r'\1', og_desc)
380
381	# canonical URL may contain series and episode number
382	mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
383	if mobj:
384	seri = int_or_none(mobj.group(1), default=float('inf'))
385	epis = int_or_none(mobj.group(2), default=float('inf'))
386	info['series_number'] = seri if seri < 100 else None
387	# some anime like Detective Conan (though not available in AbemaTV)
388	# has more than 1000 episodes (1026 as of 2021/11/15)
389	info['episode_number'] = epis if epis < 2000 else None
390
391	is_live, m3u8_url = False, None
392	if video_type == 'now-on-air':
393	is_live = True
394	channel_url = 'https://api.abema.io/v1/channels'
395	if video_id == 'news-global':
396	channel_url = update_url_query(channel_url, {'division': '1'})
397	onair_channels = self._download_json(channel_url, video_id)
398	for ch in onair_channels['channels']:
399	if video_id == ch['id']:
400	m3u8_url = ch['playback']['hls']
401	break
402	else:
403	raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
404	elif video_type == 'episode':
405	api_response = self._download_json(
406	f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
407	note='Checking playability',
408	headers=headers)
409	ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[])
410	if 3 not in ondemand_types:
411	# cannot acquire decryption key for these streams
412	self.report_warning('This is a premium-only stream')
413
414	m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
415	elif video_type == 'slots':
416	api_response = self._download_json(
417	f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
418	note='Checking playability',
419	headers=headers)
420	if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
421	self.report_warning('This is a premium-only stream')
422
423	m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
424	else:
425	raise ExtractorError('Unreachable')
426
427	if is_live:
428	self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
429	self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
430	formats = self._extract_m3u8_formats(
431	m3u8_url, video_id, ext='mp4', live=is_live)
432
433	info.update({
434	'id': video_id,
435	'title': title,
436	'description': description,
437	'formats': formats,
438	'is_live': is_live,
439	})
440	return info
441
442
443	class AbemaTVTitleIE(AbemaTVBaseIE):
444	_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
445
446	_TESTS = [{
447	'url': 'https://abema.tv/video/title/90-1597',
448	'info_dict': {
449	'id': '90-1597',
450	'title': 'シャッフルアイランド',
451	},
452	'playlist_mincount': 2,
453	}, {
454	'url': 'https://abema.tv/video/title/193-132',
455	'info_dict': {
456	'id': '193-132',
457	'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
458	},
459	'playlist_mincount': 16,
460	}]
461
462	def _real_extract(self, url):
463	video_id = self._match_id(url)
464	webpage = self._download_webpage(url, video_id)
465
466	playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id)
467	if breadcrumb:
468	playlist_title = breadcrumb[-1]
469
470	playlist = [
471	self.url_result(urljoin('https://abema.tv/', mobj.group(1)))
472	for mobj in re.finditer(r'<li\sclass=".+?EpisodeList.+?"><a\shref="(/[^"]+?)"', webpage)]
473
474	return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id)