]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/funimation.py
[thumbnailsconvertor] Treat `jpeg` as `jpg`
[yt-dlp.git] / yt_dlp / extractor / funimation.py
CommitLineData
ab4bdc91
MS
1# coding: utf-8
2from __future__ import unicode_literals
f542a3d2 3
929ba399
RA
4import random
5import string
6
ab4bdc91 7from .common import InfoExtractor
804181dd 8from ..compat import compat_HTTPError
ab4bdc91 9from ..utils import (
f542a3d2 10 determine_ext,
f377f44d 11 int_or_none,
91399b2f 12 js_to_json,
29f63c96
M
13 urlencode_postdata,
14 urljoin,
ab4bdc91 15 ExtractorError,
ab4bdc91 16)
ab4bdc91 17
b4c299ba 18
ab4bdc91 19class FunimationIE(InfoExtractor):
41d1cca3 20 _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
ab4bdc91 21
0014ffa8 22 _NETRC_MACHINE = 'funimation'
8fa17117 23 _TOKEN = None
0014ffa8 24
b59623ef 25 _TESTS = [{
91399b2f 26 'url': 'https://www.funimation.com/shows/hacksign/role-play/',
ab4bdc91 27 'info_dict': {
91399b2f 28 'id': '91144',
b59623ef
S
29 'display_id': 'role-play',
30 'ext': 'mp4',
91399b2f 31 'title': '.hack//SIGN - Role Play',
b59623ef 32 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
ec85ded8 33 'thumbnail': r're:https?://.*\.jpg',
b59623ef 34 },
91399b2f
RA
35 'params': {
36 # m3u8 download
37 'skip_download': True,
38 },
b091529a 39 }, {
91399b2f 40 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
0b1bb1ac 41 'info_dict': {
804181dd 42 'id': '210051',
0b1bb1ac
S
43 'display_id': 'broadcast-dub-preview',
44 'ext': 'mp4',
45 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
ec85ded8 46 'thumbnail': r're:https?://.*\.(?:jpg|png)',
0b1bb1ac 47 },
804181dd
RA
48 'params': {
49 # m3u8 download
50 'skip_download': True,
51 },
91399b2f
RA
52 }, {
53 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
54 'only_matching': True,
41d1cca3 55 }, {
56 # with lang code
57 'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
58 'only_matching': True,
b59623ef 59 }]
f542a3d2 60
ab4bdc91 61 def _login(self):
68217024 62 username, password = self._get_login_info()
ab4bdc91
MS
63 if username is None:
64 return
8fa17117
RA
65 try:
66 data = self._download_json(
67 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
e4d95865 68 None, 'Logging in', data=urlencode_postdata({
8fa17117
RA
69 'username': username,
70 'password': password,
71 }))
72 self._TOKEN = data['token']
73 except ExtractorError as e:
74 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
75 error = self._parse_json(e.cause.read().decode(), None)['error']
76 raise ExtractorError(error, expected=True)
77 raise
ab4bdc91
MS
78
79 def _real_initialize(self):
80 self._login()
81
82 def _real_extract(self, url):
f542a3d2 83 display_id = self._match_id(url)
91399b2f 84 webpage = self._download_webpage(url, display_id)
f542a3d2 85
91399b2f
RA
86 def _search_kane(name):
87 return self._search_regex(
88 r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
89 webpage, name, default=None)
90
91 title_data = self._parse_json(self._search_regex(
92 r'TITLE_DATA\s*=\s*({[^}]+})',
93 webpage, 'title data', default=''),
94 display_id, js_to_json, fatal=False) or {}
95
96 video_id = title_data.get('id') or self._search_regex([
97 r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
929ba399 98 r'<iframe[^>]+src="/player/(\d+)',
91399b2f
RA
99 ], webpage, 'video_id', default=None)
100 if not video_id:
101 player_url = self._html_search_meta([
102 'al:web:url',
103 'og:video:url',
104 'og:video:secure_url',
105 ], webpage, fatal=True)
106 video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
107
108 title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
109 series = _search_kane('showName')
110 if series:
111 title = '%s - %s' % (series, title)
112 description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
29f63c96 113 subtitles = self.extract_subtitles(url, video_id, display_id)
f542a3d2 114
91399b2f 115 try:
8fa17117
RA
116 headers = {}
117 if self._TOKEN:
118 headers['Authorization'] = 'Token %s' % self._TOKEN
91399b2f 119 sources = self._download_json(
929ba399
RA
120 'https://www.funimation.com/api/showexperience/%s/' % video_id,
121 video_id, headers=headers, query={
122 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
123 })['items']
91399b2f
RA
124 except ExtractorError as e:
125 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
126 error = self._parse_json(e.cause.read(), video_id)['errors'][0]
127 raise ExtractorError('%s said: %s' % (
128 self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
129 raise
f542a3d2 130
91399b2f
RA
131 formats = []
132 for source in sources:
133 source_url = source.get('src')
134 if not source_url:
135 continue
136 source_type = source.get('videoType') or determine_ext(source_url)
137 if source_type == 'm3u8':
138 formats.extend(self._extract_m3u8_formats(
139 source_url, video_id, 'mp4',
140 m3u8_id='hls', fatal=False))
141 else:
142 formats.append({
143 'format_id': source_type,
144 'url': source_url,
145 })
b59623ef
S
146 self._sort_formats(formats)
147
ab4bdc91
MS
148 return {
149 'id': video_id,
f542a3d2
S
150 'display_id': display_id,
151 'title': title,
152 'description': description,
91399b2f
RA
153 'thumbnail': self._og_search_thumbnail(webpage),
154 'series': series,
155 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
156 'episode_number': int_or_none(title_data.get('episodeNum')),
157 'episode': episode,
29f63c96 158 'subtitles': subtitles,
91399b2f 159 'season_id': title_data.get('seriesId'),
ab4bdc91 160 'formats': formats,
ab4bdc91 161 }
29f63c96
M
162
163 def _get_subtitles(self, url, video_id, display_id):
164 player_url = urljoin(url, '/player/' + video_id)
165 player_page = self._download_webpage(player_url, display_id)
166 text_tracks_json_string = self._search_regex(
167 r'"textTracks": (\[{.+?}\])',
168 player_page, 'subtitles data', default='')
169 text_tracks = self._parse_json(
170 text_tracks_json_string, display_id, js_to_json, fatal=False) or []
171 subtitles = {}
172 for text_track in text_tracks:
173 url_element = {'url': text_track.get('src')}
174 language = text_track.get('language')
175 if text_track.get('type') == 'CC':
176 language += '_CC'
177 subtitles.setdefault(language, []).append(url_element)
178 return subtitles