]>
Commit | Line | Data |
---|---|---|
3459d3c5 L |
1 | import base64 |
2 | import re | |
3 | import json | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | float_or_none, | |
8 | js_to_json, | |
9 | remove_start, | |
10 | ) | |
11 | ||
12 | ||
13 | class JStreamIE(InfoExtractor): | |
14 | # group "id" only exists for compliance, not directly used in requests | |
15 | # also all components are mandatory | |
16 | _VALID_URL = r'jstream:(?P<host>www\d+):(?P<id>(?P<publisher>[a-z0-9]+):(?P<mid>\d+))' | |
17 | ||
18 | _TESTS = [{ | |
19 | 'url': 'jstream:www50:eqd638pvwx:752', | |
20 | 'info_dict': { | |
21 | 'id': 'eqd638pvwx:752', | |
22 | 'ext': 'mp4', | |
23 | 'title': '阪神淡路大震災 激震の記録2020年版 解説動画', | |
24 | 'duration': 672, | |
25 | 'thumbnail': r're:https?://eqd638pvwx\.eq\.webcdn\.stream\.ne\.jp/.+\.jpg', | |
26 | }, | |
27 | }] | |
28 | ||
29 | def _parse_jsonp(self, callback, string, video_id): | |
30 | return self._search_json(rf'\s*{re.escape(callback)}\s*\(', string, callback, video_id) | |
31 | ||
32 | def _find_formats(self, video_id, movie_list_hls, host, publisher, subtitles): | |
33 | for value in movie_list_hls: | |
34 | text = value.get('text') or '' | |
35 | if not text.startswith('auto'): | |
36 | continue | |
37 | m3u8_id = remove_start(remove_start(text, 'auto'), '_') or None | |
38 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
39 | f'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/{value.get("url")}', video_id, 'mp4', m3u8_id=m3u8_id) | |
40 | self._merge_subtitles(subs, target=subtitles) | |
41 | yield from fmts | |
42 | ||
43 | def _real_extract(self, url): | |
44 | host, publisher, mid, video_id = self._match_valid_url(url).group('host', 'publisher', 'mid', 'id') | |
45 | video_info_jsonp = self._download_webpage( | |
46 | f'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/eq_meta/v1/{mid}.jsonp', | |
47 | video_id, 'Requesting video info') | |
48 | video_info = self._parse_jsonp('metaDataResult', video_info_jsonp, video_id)['movie'] | |
49 | subtitles = {} | |
50 | formats = list(self._find_formats(video_id, video_info.get('movie_list_hls'), host, publisher, subtitles)) | |
51 | self._remove_duplicate_formats(formats) | |
52 | return { | |
53 | 'id': video_id, | |
54 | 'title': video_info.get('title'), | |
55 | 'duration': float_or_none(video_info.get('duration')), | |
56 | 'thumbnail': video_info.get('thumbnail_url'), | |
57 | 'formats': formats, | |
58 | 'subtitles': subtitles, | |
59 | } | |
60 | ||
61 | @classmethod | |
62 | def _extract_embed_urls(cls, url, webpage): | |
63 | # check for eligiblity of webpage | |
64 | # https://support.eq.stream.co.jp/hc/ja/articles/115008388147-%E3%83%97%E3%83%AC%E3%82%A4%E3%83%A4%E3%83%BCAPI%E3%81%AE%E3%82%B5%E3%83%B3%E3%83%97%E3%83%AB%E3%82%B3%E3%83%BC%E3%83%89 | |
65 | script_tag = re.search(r'<script\s*[^>]+?src="https://ssl-cache\.stream\.ne\.jp/(?P<host>www\d+)/(?P<publisher>[a-z0-9]+)/[^"]+?/if\.js"', webpage) | |
66 | if not script_tag: | |
67 | return | |
68 | host, publisher = script_tag.groups() | |
69 | for m in re.finditer(r'(?s)PlayerFactoryIF\.create\(\s*({[^\}]+?})\s*\)\s*;', webpage): | |
70 | # TODO: using json.loads here as InfoExtractor._parse_json is not classmethod | |
71 | info = json.loads(js_to_json(m.group(1))) | |
72 | mid = base64.b64decode(info.get('m')).decode() | |
73 | yield f'jstream:{host}:{publisher}:{mid}' |