]>
Commit | Line | Data |
---|---|---|
876b70c8 | 1 | from .common import InfoExtractor |
2 | from ..networking.exceptions import HTTPError | |
3 | from ..utils import ( | |
4 | ExtractorError, | |
5 | clean_html, | |
6 | get_element_text_and_html_by_tag, | |
7 | int_or_none, | |
8 | str_or_none, | |
9 | traverse_obj, | |
10 | try_call, | |
11 | unified_timestamp, | |
12 | urljoin, | |
13 | ) | |
14 | ||
15 | ||
16 | class TBSJPEpisodeIE(InfoExtractor): | |
17 | _VALID_URL = r'https?://cu\.tbs\.co\.jp/episode/(?P<id>[\d_]+)' | |
18 | _GEO_BYPASS = False | |
19 | _TESTS = [{ | |
20 | 'url': 'https://cu.tbs.co.jp/episode/23613_2044134_1000049010', | |
21 | 'skip': 'streams geo-restricted, Japan only. Also, will likely expire eventually', | |
22 | 'info_dict': { | |
23 | 'title': 'VIVANT 第三話 誤送金完結へ!絶体絶命の反撃開始', | |
24 | 'id': '23613_2044134_1000049010', | |
25 | 'ext': 'mp4', | |
26 | 'upload_date': '20230728', | |
27 | 'duration': 3517, | |
28 | 'release_timestamp': 1691118230, | |
29 | 'episode': '第三話 誤送金完結へ!絶体絶命の反撃開始', | |
30 | 'release_date': '20230804', | |
31 | 'categories': 'count:11', | |
32 | 'episode_number': 3, | |
33 | 'timestamp': 1690522538, | |
34 | 'description': 'md5:2b796341af1ef772034133174ba4a895', | |
35 | 'series': 'VIVANT', | |
36 | }, | |
37 | }] | |
38 | ||
39 | def _real_extract(self, url): | |
40 | video_id = self._match_id(url) | |
41 | webpage = self._download_webpage(url, video_id) | |
42 | meta = self._search_json(r'window\.app\s*=', webpage, 'episode info', video_id, fatal=False) | |
43 | episode = traverse_obj(meta, ('falcorCache', 'catalog', 'episode', video_id, 'value')) | |
44 | ||
45 | tf_path = self._search_regex( | |
46 | r'<script[^>]+src=["\'](/assets/tf\.[^"\']+\.js)["\']', webpage, 'stream API config') | |
47 | tf_js = self._download_webpage(urljoin(url, tf_path), video_id, note='Downloading stream API config') | |
48 | video_url = self._search_regex(r'videoPlaybackUrl:\s*[\'"]([^\'"]+)[\'"]', tf_js, 'stream API url') | |
49 | api_key = self._search_regex(r'api_key:\s*[\'"]([^\'"]+)[\'"]', tf_js, 'stream API key') | |
50 | ||
51 | try: | |
52 | source_meta = self._download_json(f'{video_url}ref:{video_id}', video_id, | |
53 | headers={'X-Streaks-Api-Key': api_key}, | |
54 | note='Downloading stream metadata') | |
55 | except ExtractorError as e: | |
56 | if isinstance(e.cause, HTTPError) and e.cause.status == 403: | |
57 | self.raise_geo_restricted(countries=['JP']) | |
58 | raise | |
59 | ||
60 | formats, subtitles = [], {} | |
61 | for src in traverse_obj(source_meta, ('sources', ..., 'src')): | |
62 | fmts, subs = self._extract_m3u8_formats_and_subtitles(src, video_id, fatal=False) | |
63 | formats.extend(fmts) | |
64 | self._merge_subtitles(subs, target=subtitles) | |
65 | ||
66 | return { | |
67 | 'title': try_call(lambda: clean_html(get_element_text_and_html_by_tag('h3', webpage)[0])), | |
68 | 'id': video_id, | |
69 | **traverse_obj(episode, { | |
70 | 'categories': ('keywords', {list}), | |
71 | 'id': ('content_id', {str}), | |
72 | 'description': ('description', 0, 'value'), | |
73 | 'timestamp': ('created_at', {unified_timestamp}), | |
74 | 'release_timestamp': ('pub_date', {unified_timestamp}), | |
75 | 'duration': ('tv_episode_info', 'duration', {int_or_none}), | |
76 | 'episode_number': ('tv_episode_info', 'episode_number', {int_or_none}), | |
77 | 'episode': ('title', lambda _, v: not v.get('is_phonetic'), 'value'), | |
78 | 'series': ('custom_data', 'program_name'), | |
79 | }, get_all=False), | |
80 | 'formats': formats, | |
81 | 'subtitles': subtitles, | |
82 | } | |
83 | ||
84 | ||
85 | class TBSJPProgramIE(InfoExtractor): | |
86 | _VALID_URL = r'https?://cu\.tbs\.co\.jp/program/(?P<id>\d+)' | |
87 | _TESTS = [{ | |
88 | 'url': 'https://cu.tbs.co.jp/program/23601', | |
89 | 'playlist_mincount': 4, | |
90 | 'info_dict': { | |
91 | 'id': '23601', | |
92 | 'categories': ['エンタメ', 'ミライカプセル', '会社', '働く', 'バラエティ', '動画'], | |
93 | 'description': '幼少期の夢は大人になって、どう成長したのだろうか?\nそしてその夢は今後、どのように広がっていくのか?\nいま話題の会社で働く人の「夢の成長」を描く', | |
94 | 'series': 'ミライカプセル -I have a dream-', | |
95 | 'title': 'ミライカプセル -I have a dream-' | |
96 | } | |
97 | }] | |
98 | ||
99 | def _real_extract(self, url): | |
100 | programme_id = self._match_id(url) | |
101 | webpage = self._download_webpage(url, programme_id) | |
102 | meta = self._search_json(r'window\.app\s*=', webpage, 'programme info', programme_id) | |
103 | ||
104 | programme = traverse_obj(meta, ('falcorCache', 'catalog', 'program', programme_id, 'false', 'value')) | |
105 | ||
106 | return { | |
107 | '_type': 'playlist', | |
108 | 'entries': [self.url_result(f'https://cu.tbs.co.jp/episode/{video_id}', TBSJPEpisodeIE, video_id) | |
109 | for video_id in traverse_obj(programme, ('custom_data', 'seriesList', 'episodeCode', ...))], | |
110 | 'id': programme_id, | |
111 | **traverse_obj(programme, { | |
112 | 'categories': ('keywords', ...), | |
113 | 'id': ('tv_episode_info', 'show_content_id', {str_or_none}), | |
114 | 'description': ('custom_data', 'program_description'), | |
115 | 'series': ('custom_data', 'program_name'), | |
116 | 'title': ('custom_data', 'program_name'), | |
117 | }), | |
118 | } | |
119 | ||
120 | ||
121 | class TBSJPPlaylistIE(InfoExtractor): | |
122 | _VALID_URL = r'https?://cu\.tbs\.co\.jp/playlist/(?P<id>[\da-f]+)' | |
123 | _TESTS = [{ | |
124 | 'url': 'https://cu.tbs.co.jp/playlist/184f9970e7ba48e4915f1b252c55015e', | |
125 | 'playlist_mincount': 4, | |
126 | 'info_dict': { | |
127 | 'title': 'まもなく配信終了', | |
128 | 'id': '184f9970e7ba48e4915f1b252c55015e', | |
129 | } | |
130 | }] | |
131 | ||
132 | def _real_extract(self, url): | |
133 | playlist_id = self._match_id(url) | |
134 | page = self._download_webpage(url, playlist_id) | |
135 | meta = self._search_json(r'window\.app\s*=', page, 'playlist info', playlist_id) | |
136 | playlist = traverse_obj(meta, ('falcorCache', 'playList', playlist_id)) | |
137 | ||
138 | def entries(): | |
139 | for entry in traverse_obj(playlist, ('catalogs', 'value', lambda _, v: v['content_id'])): | |
140 | # TODO: it's likely possible to get all metadata from the playlist page json instead | |
141 | content_id = entry['content_id'] | |
142 | content_type = entry.get('content_type') | |
143 | if content_type == 'tv_show': | |
144 | yield self.url_result( | |
145 | f'https://cu.tbs.co.jp/program/{content_id}', TBSJPProgramIE, content_id) | |
146 | elif content_type == 'tv_episode': | |
147 | yield self.url_result( | |
148 | f'https://cu.tbs.co.jp/episode/{content_id}', TBSJPEpisodeIE, content_id) | |
149 | else: | |
150 | self.report_warning(f'Skipping "{content_id}" with unsupported content_type "{content_type}"') | |
151 | ||
152 | return self.playlist_result(entries(), playlist_id, traverse_obj(playlist, ('display_name', 'value'))) |