yt_dlp/extractor/funimation.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import random
   5 import re
   6 import string
   7
   8 from .common import InfoExtractor
   9 from ..compat import compat_HTTPError
  10 from ..utils import (
  11     determine_ext,
  12     dict_get,
  13     int_or_none,
  14     js_to_json,
  15     urlencode_postdata,
  16     urljoin,
  17     ExtractorError,
  18 )
  19
  20
  21 class FunimationIE(InfoExtractor):
  22     _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
  23
  24     _NETRC_MACHINE = 'funimation'
  25     _TOKEN = None
  26
  27     _TESTS = [{
  28         'url': 'https://www.funimation.com/shows/hacksign/role-play/',
  29         'info_dict': {
  30             'id': '91144',
  31             'display_id': 'role-play',
  32             'ext': 'mp4',
  33             'title': '.hack//SIGN - Role Play',
  34             'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
  35             'thumbnail': r're:https?://.*\.jpg',
  36         },
  37         'params': {
  38             # m3u8 download
  39             'skip_download': True,
  40         },
  41     }, {
  42         'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
  43         'info_dict': {
  44             'id': '210051',
  45             'display_id': 'broadcast-dub-preview',
  46             'ext': 'mp4',
  47             'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
  48             'thumbnail': r're:https?://.*\.(?:jpg|png)',
  49         },
  50         'params': {
  51             # m3u8 download
  52             'skip_download': True,
  53         },
  54     }, {
  55         'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
  56         'only_matching': True,
  57     }, {
  58         # with lang code
  59         'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
  60         'only_matching': True,
  61     }]
  62
  63     def _login(self):
  64         username, password = self._get_login_info()
  65         if username is None:
  66             return
  67         try:
  68             data = self._download_json(
  69                 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
  70                 None, 'Logging in', data=urlencode_postdata({
  71                     'username': username,
  72                     'password': password,
  73                 }))
  74             self._TOKEN = data['token']
  75         except ExtractorError as e:
  76             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  77                 error = self._parse_json(e.cause.read().decode(), None)['error']
  78                 raise ExtractorError(error, expected=True)
  79             raise
  80
  81     def _real_initialize(self):
  82         self._login()
  83
  84     def _real_extract(self, url):
  85         display_id = self._match_id(url)
  86         webpage = self._download_webpage(url, display_id)
  87
  88         def _search_kane(name):
  89             return self._search_regex(
  90                 r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
  91                 webpage, name, default=None)
  92
  93         title_data = self._parse_json(self._search_regex(
  94             r'TITLE_DATA\s*=\s*({[^}]+})',
  95             webpage, 'title data', default=''),
  96             display_id, js_to_json, fatal=False) or {}
  97
  98         video_id = title_data.get('id') or self._search_regex([
  99             r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
 100             r'<iframe[^>]+src="/player/(\d+)',
 101         ], webpage, 'video_id', default=None)
 102         if not video_id:
 103             player_url = self._html_search_meta([
 104                 'al:web:url',
 105                 'og:video:url',
 106                 'og:video:secure_url',
 107             ], webpage, fatal=True)
 108             video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
 109
 110         title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
 111         series = _search_kane('showName')
 112         if series:
 113             title = '%s - %s' % (series, title)
 114         description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
 115         subtitles = self.extract_subtitles(url, video_id, display_id)
 116
 117         try:
 118             headers = {}
 119             if self._TOKEN:
 120                 headers['Authorization'] = 'Token %s' % self._TOKEN
 121             sources = self._download_json(
 122                 'https://www.funimation.com/api/showexperience/%s/' % video_id,
 123                 video_id, headers=headers, query={
 124                     'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
 125                 })['items']
 126         except ExtractorError as e:
 127             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
 128                 error = self._parse_json(e.cause.read(), video_id)['errors'][0]
 129                 raise ExtractorError('%s said: %s' % (
 130                     self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
 131             raise
 132
 133         formats = []
 134         for source in sources:
 135             source_url = source.get('src')
 136             if not source_url:
 137                 continue
 138             source_type = source.get('videoType') or determine_ext(source_url)
 139             if source_type == 'm3u8':
 140                 formats.extend(self._extract_m3u8_formats(
 141                     source_url, video_id, 'mp4',
 142                     m3u8_id='hls', fatal=False))
 143             else:
 144                 formats.append({
 145                     'format_id': source_type,
 146                     'url': source_url,
 147                 })
 148         self._sort_formats(formats)
 149
 150         return {
 151             'id': video_id,
 152             'display_id': display_id,
 153             'title': title,
 154             'description': description,
 155             'thumbnail': self._og_search_thumbnail(webpage),
 156             'series': series,
 157             'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
 158             'episode_number': int_or_none(title_data.get('episodeNum')),
 159             'episode': episode,
 160             'subtitles': subtitles,
 161             'season_id': title_data.get('seriesId'),
 162             'formats': formats,
 163         }
 164
 165     def _get_subtitles(self, url, video_id, display_id):
 166         player_url = urljoin(url, '/player/' + video_id)
 167         player_page = self._download_webpage(player_url, display_id)
 168         text_tracks_json_string = self._search_regex(
 169             r'"textTracks": (\[{.+?}\])',
 170             player_page, 'subtitles data', default='')
 171         text_tracks = self._parse_json(
 172             text_tracks_json_string, display_id, js_to_json, fatal=False) or []
 173         subtitles = {}
 174         for text_track in text_tracks:
 175             url_element = {'url': text_track.get('src')}
 176             language = text_track.get('language')
 177             if text_track.get('type') == 'CC':
 178                 language += '_CC'
 179             subtitles.setdefault(language, []).append(url_element)
 180         return subtitles
 181
 182
 183 class FunimationShowIE(FunimationIE):
 184     IE_NAME = 'funimation:show'
 185     _VALID_URL = r'(?P<url>https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P<locale>[^/]+)?/?shows/(?P<id>[^/?#&]+))/?(?:[?#]|$)'
 186
 187     _TESTS = [{
 188         'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
 189         'info_dict': {
 190             'id': 1315000,
 191             'title': 'SK8 the Infinity'
 192         },
 193         'playlist_count': 13,
 194         'params': {
 195             'skip_download': True,
 196         },
 197     }, {
 198         # without lang code
 199         'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
 200         'info_dict': {
 201             'id': 39643,
 202             'title': 'Ouran High School Host Club'
 203         },
 204         'playlist_count': 26,
 205         'params': {
 206             'skip_download': True,
 207         },
 208     }]
 209
 210     def _real_extract(self, url):
 211         base_url, locale, display_id = re.match(self._VALID_URL, url).groups()
 212
 213         show_info = self._download_json(
 214             'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=US&deviceType=web&locale=%s'
 215             % (display_id, locale or 'en'), display_id)
 216         items = self._download_json(
 217             'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s'
 218             % show_info.get('id'), display_id).get('items')
 219         vod_items = map(lambda k: dict_get(k, ('mostRecentSvod', 'mostRecentAvod')).get('item'), items)
 220
 221         return {
 222             '_type': 'playlist',
 223             'id': show_info['id'],
 224             'title': show_info['name'],
 225             'entries': [
 226                 self.url_result(
 227                     '%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationIE.ie_key(),
 228                     vod_item.get('episodeId'), vod_item.get('episodeName'))
 229                 for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder'))],
 230         }