]>
Commit | Line | Data |
---|---|---|
77cc7c6e | 1 | import re |
061d1cd9 | 2 | |
298a120a | 3 | from .common import InfoExtractor |
77cc7c6e | 4 | from ..utils import ( |
81c8b9bd | 5 | ExtractorError, |
54579be4 | 6 | clean_html, |
b8e2a5e0 | 7 | filter_dict, |
54579be4 | 8 | get_element_by_class, |
81c8b9bd | 9 | int_or_none, |
10 | join_nonempty, | |
77cc7c6e | 11 | parse_duration, |
4af9d5c2 | 12 | remove_end, |
77cc7c6e | 13 | traverse_obj, |
4392447d | 14 | try_call, |
77cc7c6e LNO |
15 | unescapeHTML, |
16 | unified_timestamp, | |
81c8b9bd | 17 | url_or_none, |
8f0be90e | 18 | urljoin, |
77cc7c6e | 19 | ) |
298a120a AN |
20 | |
21 | ||
29f7c58a | 22 | class NhkBaseIE(InfoExtractor): |
6d1b3489 | 23 | _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' |
4af9d5c2 | 24 | _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/' |
298a120a | 25 | |
29f7c58a | 26 | def _call_api(self, m_id, lang, is_video, is_episode, is_clip): |
27 | return self._download_json( | |
28 | self._API_URL_TEMPLATE % ( | |
29 | 'v' if is_video else 'r', | |
30 | 'clip' if is_clip else 'esd', | |
31 | 'episode' if is_episode else 'program', | |
32 | m_id, lang, '/all' if is_video else ''), | |
33 | m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] | |
34 | ||
e831c80e | 35 | def _get_api_info(self, refresh=True): |
36 | if not refresh: | |
37 | return self.cache.load('nhk', 'api_info') | |
38 | ||
39 | self.cache.store('nhk', 'api_info', {}) | |
40 | movie_player_js = self._download_webpage( | |
41 | 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None, | |
42 | note='Downloading stream API information') | |
43 | api_info = { | |
44 | 'url': self._search_regex( | |
45 | r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'), | |
46 | 'token': self._search_regex( | |
47 | r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'), | |
48 | } | |
49 | self.cache.store('nhk', 'api_info', api_info) | |
50 | return api_info | |
51 | ||
54579be4 | 52 | def _extract_stream_info(self, vod_id): |
e831c80e | 53 | for refresh in (False, True): |
54 | api_info = self._get_api_info(refresh) | |
55 | if not api_info: | |
56 | continue | |
57 | ||
58 | api_url = api_info.pop('url') | |
54579be4 | 59 | meta = traverse_obj( |
e831c80e | 60 | self._download_json( |
61 | api_url, vod_id, 'Downloading stream url info', fatal=False, query={ | |
62 | **api_info, | |
63 | 'type': 'json', | |
64 | 'optional_id': vod_id, | |
65 | 'active_flg': 1, | |
54579be4 | 66 | }), ('meta', 0)) |
67 | stream_url = traverse_obj( | |
68 | meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False) | |
e831c80e | 69 | |
54579be4 | 70 | if stream_url: |
71 | formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id) | |
72 | return { | |
73 | **traverse_obj(meta, { | |
74 | 'duration': ('duration', {int_or_none}), | |
75 | 'timestamp': ('publication_date', {unified_timestamp}), | |
76 | 'release_timestamp': ('insert_date', {unified_timestamp}), | |
77 | 'modified_timestamp': ('update_date', {unified_timestamp}), | |
78 | }), | |
79 | 'formats': formats, | |
80 | 'subtitles': subtitles, | |
81 | } | |
e831c80e | 82 | raise ExtractorError('Unable to extract stream url') |
83 | ||
29f7c58a | 84 | def _extract_episode_info(self, url, episode=None): |
85 | fetch_episode = episode is None | |
4de94b9e | 86 | lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id') |
4af9d5c2 | 87 | is_video = m_type != 'audio' |
4de94b9e | 88 | |
89 | if is_video: | |
061d1cd9 | 90 | episode_id = episode_id[:4] + '-' + episode_id[4:] |
f9b373af | 91 | |
29f7c58a | 92 | if fetch_episode: |
93 | episode = self._call_api( | |
94 | episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] | |
45396dd2 | 95 | |
061d1cd9 | 96 | def get_clean_field(key): |
54579be4 | 97 | return clean_html(episode.get(key + '_clean') or episode.get(key)) |
45396dd2 | 98 | |
54579be4 | 99 | title = get_clean_field('sub_title') |
061d1cd9 | 100 | series = get_clean_field('title') |
45396dd2 | 101 | |
061d1cd9 RA |
102 | thumbnails = [] |
103 | for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: | |
104 | img_path = episode.get('image' + s) | |
105 | if not img_path: | |
106 | continue | |
107 | thumbnails.append({ | |
add96eb9 | 108 | 'id': f'{h}p', |
061d1cd9 RA |
109 | 'height': h, |
110 | 'width': w, | |
111 | 'url': 'https://www3.nhk.or.jp' + img_path, | |
112 | }) | |
298a120a | 113 | |
54579be4 | 114 | episode_name = title |
115 | if series and title: | |
116 | title = f'{series} - {title}' | |
117 | elif series and not title: | |
118 | title = series | |
119 | series = None | |
120 | episode_name = None | |
121 | else: # title, no series | |
122 | episode_name = None | |
123 | ||
061d1cd9 RA |
124 | info = { |
125 | 'id': episode_id + '-' + lang, | |
54579be4 | 126 | 'title': title, |
061d1cd9 RA |
127 | 'description': get_clean_field('description'), |
128 | 'thumbnails': thumbnails, | |
f9b373af | 129 | 'series': series, |
54579be4 | 130 | 'episode': episode_name, |
f9b373af | 131 | } |
54579be4 | 132 | |
061d1cd9 | 133 | if is_video: |
29f7c58a | 134 | vod_id = episode['vod_id'] |
061d1cd9 | 135 | info.update({ |
54579be4 | 136 | **self._extract_stream_info(vod_id), |
29f7c58a | 137 | 'id': vod_id, |
061d1cd9 | 138 | }) |
e831c80e | 139 | |
061d1cd9 | 140 | else: |
29f7c58a | 141 | if fetch_episode: |
4af9d5c2 | 142 | # From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html |
143 | audio_path = remove_end(episode['audio']['audio'], '.m4a') | |
29f7c58a | 144 | info['formats'] = self._extract_m3u8_formats( |
4af9d5c2 | 145 | f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8', |
29f7c58a | 146 | episode_id, 'm4a', entry_protocol='m3u8_native', |
147 | m3u8_id='hls', fatal=False) | |
148 | for f in info['formats']: | |
149 | f['language'] = lang | |
150 | else: | |
151 | info.update({ | |
152 | '_type': 'url_transparent', | |
153 | 'ie_key': NhkVodIE.ie_key(), | |
154 | 'url': url, | |
155 | }) | |
061d1cd9 | 156 | return info |
29f7c58a | 157 | |
158 | ||
159 | class NhkVodIE(NhkBaseIE): | |
4af9d5c2 | 160 | _VALID_URL = [ |
161 | rf'{NhkBaseIE._BASE_URL_REGEX}shows/(?:(?P<type>video)/)?(?P<id>\d{{4}}[\da-z]\d+)/?(?:$|[?#])', | |
162 | rf'{NhkBaseIE._BASE_URL_REGEX}(?:ondemand|shows)/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[\da-z]+)', | |
163 | rf'{NhkBaseIE._BASE_URL_REGEX}ondemand/(?P<type>video)/(?P<id>\d{{4}}[\da-z]\d+)', # deprecated | |
164 | ] | |
29f7c58a | 165 | # Content available only for a limited period of time. Visit |
166 | # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. | |
167 | _TESTS = [{ | |
4de94b9e | 168 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/', |
f41b949a | 169 | 'info_dict': { |
4de94b9e | 170 | 'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302', |
f41b949a | 171 | 'ext': 'mp4', |
4de94b9e | 172 | 'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead', |
173 | 'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6', | |
4af9d5c2 | 174 | 'thumbnail': r're:https://.+/.+\.jpg', |
4de94b9e | 175 | 'episode': 'The Tohoku Shinkansen: Full Speed Ahead', |
176 | 'series': 'Japan Railway Journal', | |
4af9d5c2 | 177 | 'modified_timestamp': 1707217907, |
54579be4 | 178 | 'timestamp': 1681428600, |
179 | 'release_timestamp': 1693883728, | |
180 | 'duration': 1679, | |
181 | 'upload_date': '20230413', | |
4af9d5c2 | 182 | 'modified_date': '20240206', |
54579be4 | 183 | 'release_date': '20230905', |
f41b949a DR |
184 | }, |
185 | }, { | |
29f7c58a | 186 | # video clip |
187 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', | |
4de94b9e | 188 | 'md5': '153c3016dfd252ba09726588149cf0e7', |
29f7c58a | 189 | 'info_dict': { |
4de94b9e | 190 | 'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5', |
29f7c58a | 191 | 'ext': 'mp4', |
4de94b9e | 192 | 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU', |
29f7c58a | 193 | 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', |
4af9d5c2 | 194 | 'thumbnail': r're:https://.+/.+\.jpg', |
f41b949a DR |
195 | 'series': 'Dining with the Chef', |
196 | 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', | |
54579be4 | 197 | 'duration': 148, |
198 | 'upload_date': '20190816', | |
199 | 'release_date': '20230902', | |
200 | 'release_timestamp': 1693619292, | |
4af9d5c2 | 201 | 'modified_timestamp': 1707217907, |
202 | 'modified_date': '20240206', | |
54579be4 | 203 | 'timestamp': 1565997540, |
29f7c58a | 204 | }, |
205 | }, { | |
4de94b9e | 206 | # radio |
207 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/', | |
29f7c58a | 208 | 'info_dict': { |
4de94b9e | 209 | 'id': 'livinginjapan-20231001-1-en', |
29f7c58a | 210 | 'ext': 'm4a', |
4de94b9e | 211 | 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines', |
212 | 'series': 'Living in Japan', | |
54579be4 | 213 | 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab', |
4af9d5c2 | 214 | 'thumbnail': r're:https://.+/.+\.jpg', |
add96eb9 | 215 | 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines', |
29f7c58a | 216 | }, |
29f7c58a | 217 | }, { |
218 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', | |
219 | 'only_matching': True, | |
220 | }, { | |
221 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/', | |
222 | 'only_matching': True, | |
223 | }, { | |
224 | 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', | |
225 | 'only_matching': True, | |
226 | }, { | |
227 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', | |
228 | 'only_matching': True, | |
6d1b3489 | 229 | }, { |
230 | # video, alphabetic character in ID #29670 | |
231 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/', | |
6d1b3489 | 232 | 'info_dict': { |
233 | 'id': 'qfjay6cg', | |
234 | 'ext': 'mp4', | |
235 | 'title': 'DESIGN TALKS plus - Fishermen’s Finery', | |
236 | 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448', | |
237 | 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$', | |
238 | 'upload_date': '20210615', | |
239 | 'timestamp': 1623722008, | |
f41b949a DR |
240 | }, |
241 | 'skip': '404 Not Found', | |
4de94b9e | 242 | }, { |
243 | # japanese-language, longer id than english | |
244 | 'url': 'https://www3.nhk.or.jp/nhkworld/ja/ondemand/video/0020271111/', | |
245 | 'info_dict': { | |
246 | 'id': 'nw_ja_v_jvod_ohayou_20231008', | |
247 | 'ext': 'mp4', | |
248 | 'title': 'おはよう日本(7時台) - 10月8日放送', | |
249 | 'series': 'おはよう日本(7時台)', | |
250 | 'episode': '10月8日放送', | |
4af9d5c2 | 251 | 'thumbnail': r're:https://.+/.+\.jpg', |
4de94b9e | 252 | 'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0', |
253 | }, | |
254 | 'skip': 'expires 2023-10-15', | |
54579be4 | 255 | }, { |
256 | # a one-off (single-episode series). title from the api is just '<p></p>' | |
257 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/', | |
258 | 'info_dict': { | |
259 | 'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552', | |
260 | 'ext': 'mp4', | |
4af9d5c2 | 261 | 'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla', |
54579be4 | 262 | 'description': 'md5:5db620c46a0698451cc59add8816b797', |
4af9d5c2 | 263 | 'thumbnail': r're:https://.+/.+\.jpg', |
54579be4 | 264 | 'release_date': '20230905', |
265 | 'timestamp': 1690103400, | |
266 | 'duration': 2939, | |
267 | 'release_timestamp': 1693898699, | |
54579be4 | 268 | 'upload_date': '20230723', |
4af9d5c2 | 269 | 'modified_timestamp': 1707217907, |
270 | 'modified_date': '20240206', | |
271 | 'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla', | |
272 | 'series': 'Barakan Discovers', | |
273 | }, | |
274 | }, { | |
275 | # /ondemand/video/ url with alphabetical character in 5th position of id | |
276 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/', | |
277 | 'info_dict': { | |
278 | 'id': 'nw_c_en_9999-a07', | |
279 | 'ext': 'mp4', | |
280 | 'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]', | |
281 | 'series': 'Mini-Dramas on SDGs', | |
282 | 'modified_date': '20240206', | |
283 | 'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]', | |
284 | 'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6', | |
285 | 'timestamp': 1621962360, | |
286 | 'duration': 189, | |
287 | 'release_date': '20230903', | |
288 | 'modified_timestamp': 1707217907, | |
289 | 'upload_date': '20210525', | |
290 | 'thumbnail': r're:https://.+/.+\.jpg', | |
291 | 'release_timestamp': 1693713487, | |
292 | }, | |
293 | }, { | |
294 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/', | |
295 | 'info_dict': { | |
296 | 'id': 'nw_c_en_9999-d17', | |
297 | 'ext': 'mp4', | |
298 | 'title': 'Flowers of snow blossom - The 72 Pentads of Yamato', | |
299 | 'description': 'Today’s focus: Snow', | |
300 | 'release_timestamp': 1693792402, | |
301 | 'release_date': '20230904', | |
302 | 'upload_date': '20220128', | |
303 | 'timestamp': 1643370960, | |
304 | 'thumbnail': r're:https://.+/.+\.jpg', | |
305 | 'duration': 136, | |
306 | 'series': '', | |
307 | 'modified_date': '20240206', | |
308 | 'modified_timestamp': 1707217907, | |
309 | }, | |
310 | }, { | |
311 | # new /shows/ url format | |
312 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/', | |
313 | 'info_dict': { | |
314 | 'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282', | |
315 | 'ext': 'mp4', | |
316 | 'title': 'Japanology Plus - 20th Anniversary Special Part 1', | |
317 | 'description': 'md5:817d41fc8e54339ad2a916161ea24faf', | |
318 | 'episode': '20th Anniversary Special Part 1', | |
319 | 'series': 'Japanology Plus', | |
320 | 'thumbnail': r're:https://.+/.+\.jpg', | |
321 | 'duration': 1680, | |
322 | 'timestamp': 1711020600, | |
323 | 'upload_date': '20240321', | |
324 | 'release_timestamp': 1711022683, | |
325 | 'release_date': '20240321', | |
326 | 'modified_timestamp': 1711031012, | |
327 | 'modified_date': '20240321', | |
328 | }, | |
329 | }, { | |
330 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/', | |
331 | 'info_dict': { | |
332 | 'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944', | |
333 | 'ext': 'mp4', | |
334 | 'title': '100 Ideas to Save the World - Working Styles Evolve', | |
335 | 'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9', | |
336 | 'episode': 'Working Styles Evolve', | |
337 | 'series': '100 Ideas to Save the World', | |
338 | 'thumbnail': r're:https://.+/.+\.jpg', | |
339 | 'duration': 899, | |
340 | 'upload_date': '20230325', | |
341 | 'timestamp': 1679755200, | |
342 | 'release_date': '20230905', | |
343 | 'release_timestamp': 1693880540, | |
344 | 'modified_date': '20240206', | |
345 | 'modified_timestamp': 1707217907, | |
54579be4 | 346 | }, |
4af9d5c2 | 347 | }, { |
348 | # new /shows/audio/ url format | |
349 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20231001-1/', | |
350 | 'only_matching': True, | |
351 | }, { | |
352 | # valid url even if can't be found in wild; support needed for clip entries extraction | |
353 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/9999o80/', | |
354 | 'only_matching': True, | |
29f7c58a | 355 | }] |
356 | ||
357 | def _real_extract(self, url): | |
358 | return self._extract_episode_info(url) | |
359 | ||
360 | ||
361 | class NhkVodProgramIE(NhkBaseIE): | |
4af9d5c2 | 362 | _VALID_URL = rf'''(?x) |
363 | {NhkBaseIE._BASE_URL_REGEX}(?:shows|tv)/ | |
364 | (?:(?P<type>audio)/programs/)?(?P<id>\w+)/? | |
365 | (?:\?(?:[^#]+&)?type=(?P<episode_type>clip|(?:radio|tv)Episode))?''' | |
29f7c58a | 366 | _TESTS = [{ |
367 | # video program episodes | |
4af9d5c2 | 368 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/sumo/', |
f41b949a DR |
369 | 'info_dict': { |
370 | 'id': 'sumo', | |
371 | 'title': 'GRAND SUMO Highlights', | |
54579be4 | 372 | 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf', |
f41b949a | 373 | }, |
4af9d5c2 | 374 | 'playlist_mincount': 1, |
f41b949a | 375 | }, { |
4af9d5c2 | 376 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/', |
29f7c58a | 377 | 'info_dict': { |
378 | 'id': 'japanrailway', | |
379 | 'title': 'Japan Railway Journal', | |
54579be4 | 380 | 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', |
29f7c58a | 381 | }, |
f41b949a | 382 | 'playlist_mincount': 12, |
29f7c58a | 383 | }, { |
384 | # video program clips | |
4af9d5c2 | 385 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/?type=clip', |
29f7c58a | 386 | 'info_dict': { |
387 | 'id': 'japanrailway', | |
388 | 'title': 'Japan Railway Journal', | |
54579be4 | 389 | 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', |
29f7c58a | 390 | }, |
4af9d5c2 | 391 | 'playlist_mincount': 12, |
29f7c58a | 392 | }, { |
393 | # audio program | |
4af9d5c2 | 394 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/programs/livinginjapan/', |
395 | 'info_dict': { | |
396 | 'id': 'livinginjapan', | |
397 | 'title': 'Living in Japan', | |
398 | 'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54', | |
399 | }, | |
400 | 'playlist_mincount': 12, | |
401 | }, { | |
402 | # /tv/ program url | |
403 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/', | |
404 | 'info_dict': { | |
405 | 'id': 'designtalksplus', | |
406 | 'title': 'DESIGN TALKS plus', | |
407 | 'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837', | |
408 | }, | |
409 | 'playlist_mincount': 20, | |
410 | }, { | |
411 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/', | |
29f7c58a | 412 | 'only_matching': True, |
413 | }] | |
414 | ||
4af9d5c2 | 415 | @classmethod |
416 | def suitable(cls, url): | |
417 | return False if NhkVodIE.suitable(url) else super().suitable(url) | |
418 | ||
419 | def _extract_meta_from_class_elements(self, class_values, html): | |
420 | for class_value in class_values: | |
421 | if value := clean_html(get_element_by_class(class_value, html)): | |
422 | return value | |
423 | ||
29f7c58a | 424 | def _real_extract(self, url): |
4de94b9e | 425 | lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type') |
29f7c58a | 426 | episodes = self._call_api( |
4af9d5c2 | 427 | program_id, lang, m_type != 'audio', False, episode_type == 'clip') |
29f7c58a | 428 | |
4af9d5c2 | 429 | def entries(): |
430 | for episode in episodes: | |
431 | if episode_path := episode.get('url'): | |
432 | yield self._extract_episode_info(urljoin(url, episode_path), episode) | |
29f7c58a | 433 | |
54579be4 | 434 | html = self._download_webpage(url, program_id) |
4af9d5c2 | 435 | program_title = self._extract_meta_from_class_elements([ |
436 | 'p-programDetail__title', # /ondemand/program/ | |
437 | 'pProgramHero__logoText', # /shows/ | |
438 | 'tAudioProgramMain__title', # /shows/audio/programs/ | |
439 | 'p-program-name'], html) # /tv/ | |
440 | program_description = self._extract_meta_from_class_elements([ | |
441 | 'p-programDetail__text', # /ondemand/program/ | |
442 | 'pProgramHero__description', # /shows/ | |
443 | 'tAudioProgramMain__info', # /shows/audio/programs/ | |
444 | 'p-program-description'], html) # /tv/ | |
29f7c58a | 445 | |
4af9d5c2 | 446 | return self.playlist_result(entries(), program_id, program_title, program_description) |
77cc7c6e LNO |
447 | |
448 | ||
449 | class NhkForSchoolBangumiIE(InfoExtractor): | |
450 | _VALID_URL = r'https?://www2\.nhk\.or\.jp/school/movie/(?P<type>bangumi|clip)\.cgi\?das_id=(?P<id>[a-zA-Z0-9_-]+)' | |
451 | _TESTS = [{ | |
452 | 'url': 'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id=D0005150191_00000', | |
453 | 'info_dict': { | |
454 | 'id': 'D0005150191_00003', | |
455 | 'title': 'にている かな', | |
456 | 'duration': 599.999, | |
457 | 'timestamp': 1396414800, | |
458 | ||
459 | 'upload_date': '20140402', | |
460 | 'ext': 'mp4', | |
461 | ||
add96eb9 | 462 | 'chapters': 'count:12', |
77cc7c6e LNO |
463 | }, |
464 | 'params': { | |
465 | # m3u8 download | |
466 | 'skip_download': True, | |
467 | }, | |
468 | }] | |
469 | ||
470 | def _real_extract(self, url): | |
471 | program_type, video_id = self._match_valid_url(url).groups() | |
472 | ||
473 | webpage = self._download_webpage( | |
474 | f'https://www2.nhk.or.jp/school/movie/{program_type}.cgi?das_id={video_id}', video_id) | |
475 | ||
476 | # searches all variables | |
477 | base_values = {g.group(1): g.group(2) for g in re.finditer(r'var\s+([a-zA-Z_]+)\s*=\s*"([^"]+?)";', webpage)} | |
478 | # and programObj values too | |
479 | program_values = {g.group(1): g.group(3) for g in re.finditer(r'(?:program|clip)Obj\.([a-zA-Z_]+)\s*=\s*(["\'])([^"]+?)\2;', webpage)} | |
480 | # extract all chapters | |
481 | chapter_durations = [parse_duration(g.group(1)) for g in re.finditer(r'chapterTime\.push\(\'([0-9:]+?)\'\);', webpage)] | |
482 | chapter_titles = [' '.join([g.group(1) or '', unescapeHTML(g.group(2))]).strip() for g in re.finditer(r'<div class="cpTitle"><span>(scene\s*\d+)?</span>([^<]+?)</div>', webpage)] | |
483 | ||
484 | # this is how player_core.js is actually doing (!) | |
485 | version = base_values.get('r_version') or program_values.get('version') | |
486 | if version: | |
487 | video_id = f'{video_id.split("_")[0]}_{version}' | |
488 | ||
489 | formats = self._extract_m3u8_formats( | |
490 | f'https://nhks-vh.akamaihd.net/i/das/{video_id[0:8]}/{video_id}_V_000.f4v/master.m3u8', | |
491 | video_id, ext='mp4', m3u8_id='hls') | |
77cc7c6e LNO |
492 | |
493 | duration = parse_duration(base_values.get('r_duration')) | |
494 | ||
495 | chapters = None | |
496 | if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles): | |
497 | start_time = chapter_durations | |
498 | end_time = chapter_durations[1:] + [duration] | |
499 | chapters = [{ | |
500 | 'start_time': s, | |
501 | 'end_time': e, | |
502 | 'title': t, | |
503 | } for s, e, t in zip(start_time, end_time, chapter_titles)] | |
504 | ||
505 | return { | |
506 | 'id': video_id, | |
507 | 'title': program_values.get('name'), | |
508 | 'duration': parse_duration(base_values.get('r_duration')), | |
509 | 'timestamp': unified_timestamp(base_values['r_upload']), | |
510 | 'formats': formats, | |
511 | 'chapters': chapters, | |
512 | } | |
513 | ||
514 | ||
515 | class NhkForSchoolSubjectIE(InfoExtractor): | |
516 | IE_DESC = 'Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)' | |
517 | KNOWN_SUBJECTS = ( | |
518 | 'rika', 'syakai', 'kokugo', | |
519 | 'sansuu', 'seikatsu', 'doutoku', | |
520 | 'ongaku', 'taiiku', 'zukou', | |
521 | 'gijutsu', 'katei', 'sougou', | |
522 | 'eigo', 'tokkatsu', | |
523 | 'tokushi', 'sonota', | |
524 | ) | |
add96eb9 | 525 | _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>{})/?(?:[\?#].*)?$'.format( |
526 | '|'.join(re.escape(s) for s in KNOWN_SUBJECTS)) | |
77cc7c6e LNO |
527 | |
528 | _TESTS = [{ | |
529 | 'url': 'https://www.nhk.or.jp/school/sougou/', | |
530 | 'info_dict': { | |
531 | 'id': 'sougou', | |
532 | 'title': '総合的な学習の時間', | |
533 | }, | |
534 | 'playlist_mincount': 16, | |
535 | }, { | |
536 | 'url': 'https://www.nhk.or.jp/school/rika/', | |
537 | 'info_dict': { | |
538 | 'id': 'rika', | |
539 | 'title': '理科', | |
540 | }, | |
541 | 'playlist_mincount': 15, | |
542 | }] | |
543 | ||
544 | def _real_extract(self, url): | |
545 | subject_id = self._match_id(url) | |
546 | webpage = self._download_webpage(url, subject_id) | |
547 | ||
548 | return self.playlist_from_matches( | |
549 | re.finditer(rf'href="((?:https?://www\.nhk\.or\.jp)?/school/{re.escape(subject_id)}/[^/]+/)"', webpage), | |
550 | subject_id, | |
551 | self._html_search_regex(r'(?s)<span\s+class="subjectName">\s*<img\s*[^<]+>\s*([^<]+?)</span>', webpage, 'title', fatal=False), | |
552 | lambda g: urljoin(url, g.group(1))) | |
553 | ||
554 | ||
555 | class NhkForSchoolProgramListIE(InfoExtractor): | |
add96eb9 | 556 | _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>(?:{})/[a-zA-Z0-9_-]+)'.format( |
557 | '|'.join(re.escape(s) for s in NhkForSchoolSubjectIE.KNOWN_SUBJECTS)) | |
77cc7c6e LNO |
558 | _TESTS = [{ |
559 | 'url': 'https://www.nhk.or.jp/school/sougou/q/', | |
560 | 'info_dict': { | |
561 | 'id': 'sougou/q', | |
562 | 'title': 'Q~こどものための哲学', | |
563 | }, | |
564 | 'playlist_mincount': 20, | |
565 | }] | |
566 | ||
567 | def _real_extract(self, url): | |
568 | program_id = self._match_id(url) | |
569 | ||
570 | webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id) | |
571 | ||
62b8dac4 | 572 | title = (self._generic_title('', webpage) |
04f3fd2c | 573 | or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False)) |
77cc7c6e LNO |
574 | title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None |
575 | description = self._html_search_regex( | |
576 | r'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>', | |
577 | webpage, 'description', fatal=False, group=0) | |
578 | ||
579 | bangumi_list = self._download_json( | |
580 | f'https://www.nhk.or.jp/school/{program_id}/meta/program.json', program_id) | |
581 | # they're always bangumi | |
582 | bangumis = [ | |
583 | self.url_result(f'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id={x}') | |
584 | for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []] | |
585 | ||
586 | return self.playlist_result(bangumis, program_id, title, description) | |
8f0be90e | 587 | |
588 | ||
589 | class NhkRadiruIE(InfoExtractor): | |
590 | _GEO_COUNTRIES = ['JP'] | |
591 | IE_DESC = 'NHK らじる (Radiru/Rajiru)' | |
592 | _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?' | |
593 | _TESTS = [{ | |
b8e2a5e0 | 594 | 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239', |
595 | 'skip': 'Episode expired on 2024-06-09', | |
8f0be90e | 596 | 'info_dict': { |
b8e2a5e0 | 597 | 'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集', |
598 | 'id': '0449_01_4003239', | |
8f0be90e | 599 | 'ext': 'm4a', |
b8e2a5e0 | 600 | 'uploader': 'NHK FM 東京', |
601 | 'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc', | |
8f0be90e | 602 | 'series': 'ジャズ・トゥナイト', |
b8e2a5e0 | 603 | 'channel': 'NHK FM 東京', |
8f0be90e | 604 | 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', |
b8e2a5e0 | 605 | 'upload_date': '20240601', |
606 | 'series_id': '0449_01', | |
607 | 'release_date': '20240601', | |
608 | 'timestamp': 1717257600, | |
609 | 'release_timestamp': 1717250400, | |
8f0be90e | 610 | }, |
611 | }, { | |
612 | # playlist, airs every weekday so it should _hopefully_ be okay forever | |
613 | 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01', | |
614 | 'info_dict': { | |
615 | 'id': '0458_01', | |
616 | 'title': 'ベストオブクラシック', | |
617 | 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。', | |
8f0be90e | 618 | 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg', |
b8e2a5e0 | 619 | 'series_id': '0458_01', |
620 | 'uploader': 'NHK FM', | |
621 | 'channel': 'NHK FM', | |
622 | 'series': 'ベストオブクラシック', | |
8f0be90e | 623 | }, |
624 | 'playlist_mincount': 3, | |
625 | }, { | |
626 | # one with letters in the id | |
b8e2a5e0 | 627 | 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688', |
628 | 'note': 'Expires on 2025-03-31', | |
8f0be90e | 629 | 'info_dict': { |
b8e2a5e0 | 630 | 'id': 'F683_01_3910688', |
8f0be90e | 631 | 'ext': 'm4a', |
b8e2a5e0 | 632 | 'title': '夏目漱石「文鳥」第1回', |
633 | 'series': '【らじる文庫】夏目漱石「文鳥」(全4回)', | |
634 | 'series_id': 'F683_01', | |
635 | 'description': '朗読:浅井理アナウンサー', | |
636 | 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg', | |
637 | 'upload_date': '20240106', | |
638 | 'release_date': '20240106', | |
639 | 'uploader': 'NHK R1', | |
640 | 'release_timestamp': 1704511800, | |
641 | 'channel': 'NHK R1', | |
642 | 'timestamp': 1704512700, | |
4392447d | 643 | }, |
b8e2a5e0 | 644 | 'expected_warnings': ['Unable to download JSON metadata', |
645 | 'Failed to get extended metadata. API returned Error 1: Invalid parameters'], | |
8f0be90e | 646 | }, { |
647 | # news | |
b8e2a5e0 | 648 | 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173', |
8f0be90e | 649 | 'info_dict': { |
b8e2a5e0 | 650 | 'id': 'F261_01_4012173', |
8f0be90e | 651 | 'ext': 'm4a', |
652 | 'channel': 'NHKラジオ第1', | |
54579be4 | 653 | 'uploader': 'NHKラジオ第1', |
8f0be90e | 654 | 'series': 'NHKラジオニュース', |
b8e2a5e0 | 655 | 'title': '午前0時のNHKニュース', |
8f0be90e | 656 | 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', |
b8e2a5e0 | 657 | 'release_timestamp': 1718290800, |
658 | 'release_date': '20240613', | |
659 | 'timestamp': 1718291400, | |
660 | 'upload_date': '20240613', | |
8f0be90e | 661 | }, |
b8e2a5e0 | 662 | }, { |
663 | # fallback when extended metadata fails | |
664 | 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298', | |
665 | 'skip': 'Expires on 2024-06-07', | |
666 | 'info_dict': { | |
667 | 'id': '2834_01_4009298', | |
668 | 'title': 'まち☆キラ!開成町特集', | |
669 | 'ext': 'm4a', | |
670 | 'release_date': '20240531', | |
671 | 'upload_date': '20240531', | |
672 | 'series': 'はま☆キラ!', | |
673 | 'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg', | |
674 | 'channel': 'NHK R1,FM', | |
675 | 'description': '', | |
676 | 'timestamp': 1717123800, | |
677 | 'uploader': 'NHK R1,FM', | |
678 | 'release_timestamp': 1717120800, | |
679 | 'series_id': '2834_01', | |
680 | }, | |
681 | 'expected_warnings': ['Failed to get extended metadata. API returned empty list.'], | |
8f0be90e | 682 | }] |
683 | ||
4392447d | 684 | _API_URL_TMPL = None |
685 | ||
b8e2a5e0 | 686 | def _extract_extended_metadata(self, episode_id, aa_vinfo): |
687 | service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')})) | |
4392447d | 688 | detail_url = try_call( |
b8e2a5e0 | 689 | lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3])) |
4392447d | 690 | if not detail_url: |
b8e2a5e0 | 691 | return {} |
692 | ||
693 | response = self._download_json( | |
694 | detail_url, episode_id, 'Downloading extended metadata', | |
695 | 'Failed to download extended metadata', fatal=False, expected_status=400) | |
696 | if not response: | |
697 | return {} | |
698 | ||
699 | if error := traverse_obj(response, ('error', {dict})): | |
700 | self.report_warning( | |
701 | 'Failed to get extended metadata. API returned ' | |
702 | f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}') | |
703 | return {} | |
704 | ||
705 | full_meta = traverse_obj(response, ('list', service, 0, {dict})) | |
706 | if not full_meta: | |
707 | self.report_warning('Failed to get extended metadata. API returned empty list.') | |
708 | return {} | |
709 | ||
710 | station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None | |
711 | thumbnails = [{ | |
712 | 'id': str(id_), | |
713 | 'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1, | |
714 | **traverse_obj(thumb, { | |
715 | 'url': 'url', | |
716 | 'width': ('width', {int_or_none}), | |
717 | 'height': ('height', {int_or_none}), | |
718 | }), | |
719 | } for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))] | |
720 | ||
721 | return filter_dict({ | |
722 | 'channel': station, | |
723 | 'uploader': station, | |
724 | 'description': join_nonempty( | |
725 | 'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta), | |
726 | 'thumbnails': thumbnails, | |
727 | **traverse_obj(full_meta, { | |
728 | 'title': ('title', {str}), | |
729 | 'timestamp': ('end_time', {unified_timestamp}), | |
730 | 'release_timestamp': ('start_time', {unified_timestamp}), | |
731 | }), | |
732 | }) | |
733 | ||
734 | def _extract_episode_info(self, episode, programme_id, series_meta): | |
735 | episode_id = f'{programme_id}_{episode["id"]}' | |
736 | aa_vinfo = traverse_obj(episode, ('aa_contents_id', {lambda x: x.split(';')})) | |
737 | extended_metadata = self._extract_extended_metadata(episode_id, aa_vinfo) | |
738 | fallback_start_time, _, fallback_end_time = traverse_obj( | |
739 | aa_vinfo, (4, {str}, {lambda x: (x or '').partition('_')})) | |
4392447d | 740 | |
b8e2a5e0 | 741 | return { |
742 | **series_meta, | |
743 | 'id': episode_id, | |
744 | 'formats': self._extract_m3u8_formats(episode.get('stream_url'), episode_id, fatal=False), | |
745 | 'container': 'm4a_dash', # force fixup, AAC-only HLS | |
746 | 'was_live': True, | |
747 | 'title': episode.get('program_title'), | |
748 | 'description': episode.get('program_sub_title'), # fallback | |
749 | 'timestamp': unified_timestamp(fallback_end_time), | |
750 | 'release_timestamp': unified_timestamp(fallback_start_time), | |
751 | **extended_metadata, | |
752 | } | |
4392447d | 753 | |
b8e2a5e0 | 754 | def _extract_news_info(self, headline, programme_id, series_meta): |
8f0be90e | 755 | episode_id = f'{programme_id}_{headline["headline_id"]}' |
756 | episode = traverse_obj(headline, ('file_list', 0, {dict})) | |
757 | ||
758 | return { | |
759 | **series_meta, | |
760 | 'id': episode_id, | |
761 | 'formats': self._extract_m3u8_formats(episode.get('file_name'), episode_id, fatal=False), | |
762 | 'container': 'm4a_dash', # force fixup, AAC-only HLS | |
763 | 'was_live': True, | |
764 | 'series': series_meta.get('title'), | |
765 | 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'), | |
766 | **traverse_obj(episode, { | |
b8e2a5e0 | 767 | 'title': ('file_title', {str}), |
768 | 'description': ('file_title_sub', {str}), | |
8f0be90e | 769 | 'timestamp': ('open_time', {unified_timestamp}), |
770 | 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}), | |
771 | }), | |
772 | } | |
773 | ||
4392447d | 774 | def _real_initialize(self): |
775 | if self._API_URL_TMPL: | |
776 | return | |
777 | api_config = self._download_xml( | |
778 | 'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False) | |
779 | NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}') | |
780 | ||
8f0be90e | 781 | def _real_extract(self, url): |
782 | site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') | |
783 | programme_id = f'{site_id}_{corner_id}' | |
784 | ||
b8e2a5e0 | 785 | if site_id == 'F261': # XXX: News programmes use old API (for now?) |
786 | meta = self._download_json( | |
787 | 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main'] | |
788 | series_meta = traverse_obj(meta, { | |
789 | 'title': ('program_name', {str}), | |
790 | 'channel': ('media_name', {str}), | |
791 | 'uploader': ('media_name', {str}), | |
792 | 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}), | |
793 | }, get_all=False) | |
794 | ||
795 | if headline_id: | |
796 | headline = traverse_obj( | |
797 | meta, ('detail_list', lambda _, v: v['headline_id'] == headline_id, any)) | |
798 | if not headline: | |
799 | raise ExtractorError('Content not found; it has most likely expired', expected=True) | |
800 | return self._extract_news_info(headline, programme_id, series_meta) | |
801 | ||
802 | def news_entries(): | |
803 | for headline in traverse_obj(meta, ('detail_list', ..., {dict})): | |
804 | yield self._extract_news_info(headline, programme_id, series_meta) | |
805 | ||
806 | return self.playlist_result( | |
807 | news_entries(), programme_id, description=meta.get('site_detail'), **series_meta) | |
808 | ||
809 | meta = self._download_json( | |
810 | 'https://www.nhk.or.jp/radio-api/app/v1/web/ondemand/series', programme_id, query={ | |
811 | 'site_id': site_id, | |
812 | 'corner_site_id': corner_id, | |
813 | }) | |
8f0be90e | 814 | |
b8e2a5e0 | 815 | fallback_station = join_nonempty('NHK', traverse_obj(meta, ('radio_broadcast', {str})), delim=' ') |
816 | series_meta = { | |
817 | 'series': join_nonempty('title', 'corner_name', delim=' ', from_dict=meta), | |
818 | 'series_id': programme_id, | |
819 | 'thumbnail': traverse_obj(meta, ('thumbnail_url', {url_or_none})), | |
820 | 'channel': fallback_station, | |
821 | 'uploader': fallback_station, | |
822 | } | |
8f0be90e | 823 | |
824 | if headline_id: | |
b8e2a5e0 | 825 | episode = traverse_obj(meta, ('episodes', lambda _, v: v['id'] == int(headline_id), any)) |
826 | if not episode: | |
827 | raise ExtractorError('Content not found; it has most likely expired', expected=True) | |
828 | return self._extract_episode_info(episode, programme_id, series_meta) | |
8f0be90e | 829 | |
830 | def entries(): | |
b8e2a5e0 | 831 | for episode in traverse_obj(meta, ('episodes', ..., {dict})): |
832 | yield self._extract_episode_info(episode, programme_id, series_meta) | |
8f0be90e | 833 | |
834 | return self.playlist_result( | |
b8e2a5e0 | 835 | entries(), programme_id, title=series_meta.get('series'), |
836 | description=meta.get('series_description'), **series_meta) | |
8f0be90e | 837 | |
838 | ||
839 | class NhkRadioNewsPageIE(InfoExtractor): | |
840 | _VALID_URL = r'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])' | |
841 | _TESTS = [{ | |
842 | # airs daily, on-the-hour most hours | |
843 | 'url': 'https://www.nhk.or.jp/radionews/', | |
844 | 'playlist_mincount': 5, | |
845 | 'info_dict': { | |
846 | 'id': 'F261_01', | |
847 | 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', | |
848 | 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d', | |
849 | 'channel': 'NHKラジオ第1', | |
54579be4 | 850 | 'uploader': 'NHKラジオ第1', |
8f0be90e | 851 | 'title': 'NHKラジオニュース', |
add96eb9 | 852 | }, |
8f0be90e | 853 | }] |
854 | ||
855 | def _real_extract(self, url): | |
856 | return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE) | |
81c8b9bd | 857 | |
858 | ||
859 | class NhkRadiruLiveIE(InfoExtractor): | |
860 | _GEO_COUNTRIES = ['JP'] | |
861 | _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)' | |
862 | _TESTS = [{ | |
863 | # radio 1, no area specified | |
864 | 'url': 'https://www.nhk.or.jp/radio/player/?ch=r1', | |
865 | 'info_dict': { | |
866 | 'id': 'r1-tokyo', | |
867 | 'title': 're:^NHKネットラジオ第1 東京.+$', | |
868 | 'ext': 'm4a', | |
869 | 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png', | |
870 | 'live_status': 'is_live', | |
871 | }, | |
872 | }, { | |
873 | # radio 2, area specified | |
874 | # (the area doesnt actually matter, r2 is national) | |
875 | 'url': 'https://www.nhk.or.jp/radio/player/?ch=r2', | |
876 | 'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}}, | |
877 | 'info_dict': { | |
878 | 'id': 'r2-fukuoka', | |
879 | 'title': 're:^NHKネットラジオ第2 福岡.+$', | |
880 | 'ext': 'm4a', | |
881 | 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png', | |
882 | 'live_status': 'is_live', | |
883 | }, | |
884 | }, { | |
885 | # fm, area specified | |
886 | 'url': 'https://www.nhk.or.jp/radio/player/?ch=fm', | |
887 | 'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}}, | |
888 | 'info_dict': { | |
889 | 'id': 'fm-sapporo', | |
890 | 'title': 're:^NHKネットラジオFM 札幌.+$', | |
891 | 'ext': 'm4a', | |
892 | 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png', | |
893 | 'live_status': 'is_live', | |
add96eb9 | 894 | }, |
81c8b9bd | 895 | }] |
896 | ||
897 | _NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'} | |
898 | ||
899 | def _real_extract(self, url): | |
900 | station = self._match_id(url) | |
901 | area = self._configuration_arg('area', ['tokyo'])[0] | |
902 | ||
903 | config = self._download_xml( | |
904 | 'https://www.nhk.or.jp/radio/config/config_web.xml', station, 'Downloading area information') | |
905 | data = config.find(f'.//data//area[.="{area}"]/..') | |
906 | ||
907 | if not data: | |
add96eb9 | 908 | raise ExtractorError('Invalid area. Valid areas are: {}'.format(', '.join( |
909 | [i.text for i in config.findall('.//data//area')])), expected=True) | |
81c8b9bd | 910 | |
911 | noa_info = self._download_json( | |
912 | f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text), | |
5af1f197 | 913 | station, note=f'Downloading {area} station metadata', fatal=False) |
81c8b9bd | 914 | present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present')) |
915 | ||
916 | return { | |
add96eb9 | 917 | 'title': ' '.join(traverse_obj(present_info, (('service', 'area'), 'name', {str}))), |
81c8b9bd | 918 | 'id': join_nonempty(station, area), |
919 | 'thumbnails': traverse_obj(present_info, ('service', 'images', ..., { | |
920 | 'url': 'url', | |
921 | 'width': ('width', {int_or_none}), | |
922 | 'height': ('height', {int_or_none}), | |
923 | })), | |
924 | 'formats': self._extract_m3u8_formats(data.find(f'{station}hls').text, station), | |
925 | 'is_live': True, | |
926 | } |